All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore
@ 2019-10-02 11:19 Chris Wilson
  2019-10-02 11:19 ` [PATCH 02/30] dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling) Chris Wilson
                   ` (34 more replies)
  0 siblings, 35 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

If execlists's lite-restore is based on the common GEM context tag
rather than the per-intel_context LRCA, then a context switch between
two intel_contexts on the same engine derived from the same GEM context
will perform a lite-restore instead of a full context switch. We can
exploit this by poisoning the ringbuffer of the first context and trying
to trick a simple RING_TAIL update (i.e. lite-restore)

v2: Also check what happens if preempt ce[0] with ce[1] (both instances
on the same engine from the same parent context) [Tvrtko]

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c    |  18 ++-
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 184 +++++++++++++++++++++++++
 2 files changed, 198 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9877b6ba13df..431d3b8c3371 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -235,6 +235,16 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     const struct intel_ring *ring,
 				     bool close);
 
+static void __context_pin_acquire(struct intel_context *ce)
+{
+	mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_);
+}
+
+static void __context_pin_release(struct intel_context *ce)
+{
+	mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_);
+}
+
 static void mark_eio(struct i915_request *rq)
 {
 	if (!i915_request_signaled(rq))
@@ -2761,7 +2771,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
 	/* Proclaim we have exclusive access to the context image! */
-	mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _THIS_IP_);
+	__context_pin_acquire(ce);
 
 	rq = active_request(rq);
 	if (!rq) {
@@ -2825,7 +2835,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	__execlists_reset_reg_state(ce, engine);
 	__execlists_update_reg_state(ce, engine);
 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
-	mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_);
+	__context_pin_release(ce);
 
 unwind:
 	/* Push back any incomplete requests for replay after the reset. */
@@ -4439,7 +4449,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
 			    bool scrub)
 {
 	GEM_BUG_ON(!intel_context_is_pinned(ce));
-	mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _THIS_IP_);
+	__context_pin_acquire(ce);
 
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
@@ -4465,7 +4475,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
 	intel_ring_update_space(ce->ring);
 
 	__execlists_update_reg_state(ce, engine);
-	mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_);
+	__context_pin_release(ce);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 93f2fcdc49bf..2da4fa5149cb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -79,6 +79,188 @@ static int live_sanitycheck(void *arg)
 	return err;
 }
 
+static int live_unlite_restore(struct drm_i915_private *i915, int prio)
+{
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	struct igt_spinner spin;
+	int err = -ENOMEM;
+
+	/*
+	 * Check that we can correctly context switch between 2 instances
+	 * on the same engine from the same parent context.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	if (igt_spinner_init(&spin, &i915->gt))
+		goto err_unlock;
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		goto err_spin;
+
+	err = 0;
+	for_each_engine(engine, i915, id) {
+		struct intel_context *ce[2] = {};
+		struct i915_request *rq[2];
+		struct igt_live_test t;
+		int n;
+
+		if (prio && !intel_engine_has_preemption(engine))
+			continue;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+			err = -EIO;
+			break;
+		}
+
+		for (n = 0; n < ARRAY_SIZE(ce); n++) {
+			struct intel_context *tmp;
+
+			tmp = intel_context_create(ctx, engine);
+			if (IS_ERR(tmp)) {
+				err = PTR_ERR(tmp);
+				goto err_ce;
+			}
+
+			err = intel_context_pin(tmp);
+			if (err) {
+				intel_context_put(tmp);
+				goto err_ce;
+			}
+
+			/*
+			 * Setup the pair of contexts such that if we
+			 * lite-restore using the RING_TAIL from ce[1] it
+			 * will execute garbage from ce[0]->ring.
+			 */
+			memset(tmp->ring->vaddr,
+			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
+			       tmp->ring->vma->size);
+
+			ce[n] = tmp;
+		}
+		GEM_BUG_ON(!ce[1]->ring->size);
+		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
+
+		local_bh_disable(); /* appease lockdep */
+		__context_pin_acquire(ce[1]);
+		__execlists_update_reg_state(ce[1], engine);
+		__context_pin_release(ce[1]);
+		local_bh_enable();
+
+		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			goto err_ce;
+		}
+
+		i915_request_get(rq[0]);
+		i915_request_add(rq[0]);
+		GEM_BUG_ON(rq[0]->tail > ce[1]->ring->emit);
+
+		if (!igt_wait_for_spinner(&spin, rq[0])) {
+			i915_request_put(rq[0]);
+			goto err_ce;
+		}
+
+		rq[1] = i915_request_create(ce[1]);
+		if (IS_ERR(rq[1])) {
+			err = PTR_ERR(rq[1]);
+			i915_request_put(rq[0]);
+			goto err_ce;
+		}
+
+		if (!prio) {
+			/*
+			 * Ensure we do the switch to ce[1] on completion.
+			 *
+			 * rq[0] is already submitted, so this should reduce
+			 * to a no-op (a wait on a request on the same engine
+			 * uses the submit fence, not the completion fence),
+			 * but it will install a dependency on rq[1] for rq[0]
+			 * that will prevent the pair being reordered by
+			 * timeslicing.
+			 */
+			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+		}
+
+		i915_request_get(rq[1]);
+		i915_request_add(rq[1]);
+		GEM_TRACE("rq[0]:{ head:%x, tail:%x }, rq[1]:{ head:%x, tail:%x }\n",
+			  rq[0]->head, rq[0]->tail,
+			  rq[1]->head, rq[1]->tail);
+		GEM_BUG_ON(rq[1]->tail <= rq[0]->tail);
+		i915_request_put(rq[0]);
+
+		if (prio) {
+			struct i915_sched_attr attr = {
+				.priority = prio,
+			};
+
+			/* Alternatively preempt the spinner with ce[1] */
+			engine->schedule(rq[1], &attr);
+		}
+
+		/* And switch back to ce[0] for good measure */
+		rq[0] = i915_request_create(ce[0]);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			i915_request_put(rq[1]);
+			goto err_ce;
+		}
+
+		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
+		i915_request_add(rq[0]);
+		GEM_TRACE("rq[1]:{ head:%x, tail:%x }, rq[0]:{ head:%x, tail:%x }\n",
+			  rq[1]->head, rq[1]->tail,
+			  rq[0]->head, rq[0]->tail);
+		GEM_BUG_ON(rq[0]->tail > rq[1]->tail);
+		i915_request_put(rq[1]);
+
+err_ce:
+		tasklet_kill(&engine->execlists.tasklet); /* flush submission */
+		igt_spinner_end(&spin);
+		for (n = 0; n < ARRAY_SIZE(ce); n++) {
+			if (IS_ERR_OR_NULL(ce[n]))
+				break;
+
+			intel_context_unpin(ce[n]);
+			intel_context_put(ce[n]);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
+	kernel_context_close(ctx);
+err_spin:
+	igt_spinner_fini(&spin);
+err_unlock:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static int live_unlite_switch(void *arg)
+{
+	return live_unlite_restore(arg, 0);
+}
+
+static int live_unlite_preempt(void *arg)
+{
+	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
+}
+
 static int
 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 {
@@ -2178,6 +2360,8 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_sanitycheck),
+		SUBTEST(live_unlite_switch),
+		SUBTEST(live_unlite_preempt),
 		SUBTEST(live_timeslice_preempt),
 		SUBTEST(live_busywait_preempt),
 		SUBTEST(live_preempt),
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 02/30] dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 03/30] drm/mm: Pack allocated/scanned boolean into a bitfield Chris Wilson
                   ` (33 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Make dma_fence_enable_sw_signaling() behave like its
dma_fence_add_callback() and dma_fence_default_wait() counterparts and
perform the test to enable signaling under the fence->lock, along with
the action to do so. This ensure that should an implementation be trying
to flush the cb_list (by signaling) on retirement before freeing the
fence, it can do so in a race-free manner.

See also 0fc89b6802ba ("dma-fence: Simply wrap dma_fence_signal_locked
with dma_fence_signal").

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/dma-buf/dma-fence.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 2c136aee3e79..587727089134 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -285,19 +285,18 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence)
 {
 	unsigned long flags;
 
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return;
+
+	spin_lock_irqsave(fence->lock, flags);
 	if (!test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 			      &fence->flags) &&
-	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
 	    fence->ops->enable_signaling) {
 		trace_dma_fence_enable_signal(fence);
-
-		spin_lock_irqsave(fence->lock, flags);
-
 		if (!fence->ops->enable_signaling(fence))
 			dma_fence_signal_locked(fence);
-
-		spin_unlock_irqrestore(fence->lock, flags);
 	}
+	spin_unlock_irqrestore(fence->lock, flags);
 }
 EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 03/30] drm/mm: Pack allocated/scanned boolean into a bitfield
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
  2019-10-02 11:19 ` [PATCH 02/30] dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling) Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 04/30] drm/i915: Only track bound elements of the GTT Chris Wilson
                   ` (32 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

The ulterior motive to switching the booleans over to bitops is to
allow use of the allocated flag as a bitlock.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_mm.c                      | 36 +++++++++++--------
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  6 ++--
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem.c               | 16 ++++-----
 drivers/gpu/drm/i915/i915_gem_evict.c         |  2 +-
 drivers/gpu/drm/i915/i915_vma.c               |  2 +-
 drivers/gpu/drm/i915/i915_vma.h               |  4 +--
 drivers/gpu/drm/selftests/test-drm_mm.c       | 14 ++++----
 drivers/gpu/drm/vc4/vc4_crtc.c                |  2 +-
 drivers/gpu/drm/vc4/vc4_hvs.c                 |  2 +-
 drivers/gpu/drm/vc4/vc4_plane.c               |  4 +--
 include/drm/drm_mm.h                          |  7 ++--
 12 files changed, 52 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 4581c5387372..211967006cec 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -174,7 +174,7 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
 
 	node->__subtree_last = LAST(node);
 
-	if (hole_node->allocated) {
+	if (drm_mm_node_allocated(hole_node)) {
 		rb = &hole_node->rb;
 		while (rb) {
 			parent = rb_entry(rb, struct drm_mm_node, rb);
@@ -424,9 +424,9 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 
 	node->mm = mm;
 
+	__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 	list_add(&node->node_list, &hole->node_list);
 	drm_mm_interval_tree_add_node(hole, node);
-	node->allocated = true;
 	node->hole_size = 0;
 
 	rm_hole(hole);
@@ -543,9 +543,9 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 		node->color = color;
 		node->hole_size = 0;
 
+		__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 		list_add(&node->node_list, &hole->node_list);
 		drm_mm_interval_tree_add_node(hole, node);
-		node->allocated = true;
 
 		rm_hole(hole);
 		if (adj_start > hole_start)
@@ -561,6 +561,11 @@ int drm_mm_insert_node_in_range(struct drm_mm * const mm,
 }
 EXPORT_SYMBOL(drm_mm_insert_node_in_range);
 
+static inline bool drm_mm_node_scanned_block(const struct drm_mm_node *node)
+{
+	return test_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags);
+}
+
 /**
  * drm_mm_remove_node - Remove a memory node from the allocator.
  * @node: drm_mm_node to remove
@@ -574,8 +579,8 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 	struct drm_mm *mm = node->mm;
 	struct drm_mm_node *prev_node;
 
-	DRM_MM_BUG_ON(!node->allocated);
-	DRM_MM_BUG_ON(node->scanned_block);
+	DRM_MM_BUG_ON(!drm_mm_node_allocated(node));
+	DRM_MM_BUG_ON(drm_mm_node_scanned_block(node));
 
 	prev_node = list_prev_entry(node, node_list);
 
@@ -584,11 +589,12 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 
 	drm_mm_interval_tree_remove(node, &mm->interval_tree);
 	list_del(&node->node_list);
-	node->allocated = false;
 
 	if (drm_mm_hole_follows(prev_node))
 		rm_hole(prev_node);
 	add_hole(prev_node);
+
+	clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 }
 EXPORT_SYMBOL(drm_mm_remove_node);
 
@@ -605,7 +611,7 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 {
 	struct drm_mm *mm = old->mm;
 
-	DRM_MM_BUG_ON(!old->allocated);
+	DRM_MM_BUG_ON(!drm_mm_node_allocated(old));
 
 	*new = *old;
 
@@ -622,8 +628,7 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 				&mm->holes_addr);
 	}
 
-	old->allocated = false;
-	new->allocated = true;
+	clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &old->flags);
 }
 EXPORT_SYMBOL(drm_mm_replace_node);
 
@@ -731,9 +736,9 @@ bool drm_mm_scan_add_block(struct drm_mm_scan *scan,
 	u64 adj_start, adj_end;
 
 	DRM_MM_BUG_ON(node->mm != mm);
-	DRM_MM_BUG_ON(!node->allocated);
-	DRM_MM_BUG_ON(node->scanned_block);
-	node->scanned_block = true;
+	DRM_MM_BUG_ON(!drm_mm_node_allocated(node));
+	DRM_MM_BUG_ON(drm_mm_node_scanned_block(node));
+	__set_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags);
 	mm->scan_active++;
 
 	/* Remove this block from the node_list so that we enlarge the hole
@@ -818,8 +823,7 @@ bool drm_mm_scan_remove_block(struct drm_mm_scan *scan,
 	struct drm_mm_node *prev_node;
 
 	DRM_MM_BUG_ON(node->mm != scan->mm);
-	DRM_MM_BUG_ON(!node->scanned_block);
-	node->scanned_block = false;
+	DRM_MM_BUG_ON(!drm_mm_node_scanned_block(node));
 
 	DRM_MM_BUG_ON(!node->mm->scan_active);
 	node->mm->scan_active--;
@@ -837,6 +841,8 @@ bool drm_mm_scan_remove_block(struct drm_mm_scan *scan,
 		      list_next_entry(node, node_list));
 	list_add(&node->node_list, &prev_node->node_list);
 
+	__clear_bit(DRM_MM_NODE_SCANNED_BIT, &node->flags);
+
 	return (node->start + node->size > scan->hit_start &&
 		node->start < scan->hit_end);
 }
@@ -917,7 +923,7 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size)
 
 	/* Clever trick to avoid a special case in the free hole tracking. */
 	INIT_LIST_HEAD(&mm->head_node.node_list);
-	mm->head_node.allocated = false;
+	mm->head_node.flags = 0;
 	mm->head_node.mm = mm;
 	mm->head_node.start = start + size;
 	mm->head_node.size = -size;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 27dbcb508055..c049199a1df5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -906,7 +906,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
 	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
 	cache->has_fence = cache->gen < 4;
 	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
-	cache->node.allocated = false;
+	cache->node.flags = 0;
 	cache->ce = NULL;
 	cache->rq = NULL;
 	cache->rq_size = 0;
@@ -968,7 +968,7 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
 
-		if (cache->node.allocated) {
+		if (drm_mm_node_allocated(&cache->node)) {
 			ggtt->vm.clear_range(&ggtt->vm,
 					     cache->node.start,
 					     cache->node.size);
@@ -1061,7 +1061,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 	}
 
 	offset = cache->node.start;
-	if (cache->node.allocated) {
+	if (drm_mm_node_allocated(&cache->node)) {
 		ggtt->vm.insert_page(&ggtt->vm,
 				     i915_gem_object_get_dma_address(obj, page),
 				     offset, I915_CACHE_NONE, 0);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index bb878119f06c..bb4889d2346d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -387,7 +387,7 @@ static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt)
 {
 	struct drm_mm_node *node = &ggtt->uc_fw;
 
-	GEM_BUG_ON(!node->allocated);
+	GEM_BUG_ON(!drm_mm_node_allocated(node));
 	GEM_BUG_ON(upper_32_bits(node->start));
 	GEM_BUG_ON(upper_32_bits(node->start + node->size - 1));
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1426e506700d..7046067f70c1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -351,12 +351,12 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 					       PIN_NOEVICT);
 	if (!IS_ERR(vma)) {
 		node.start = i915_ggtt_offset(vma);
-		node.allocated = false;
+		node.flags = 0;
 	} else {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
 			goto out_unlock;
-		GEM_BUG_ON(!node.allocated);
+		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -393,7 +393,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		unsigned page_offset = offset_in_page(offset);
 		unsigned page_length = PAGE_SIZE - page_offset;
 		page_length = remain < page_length ? remain : page_length;
-		if (node.allocated) {
+		if (drm_mm_node_allocated(&node)) {
 			ggtt->vm.insert_page(&ggtt->vm,
 					     i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
 					     node.start, I915_CACHE_NONE, 0);
@@ -415,7 +415,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
 	mutex_lock(&i915->drm.struct_mutex);
-	if (node.allocated) {
+	if (drm_mm_node_allocated(&node)) {
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
 		remove_mappable_node(&node);
 	} else {
@@ -561,12 +561,12 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 					       PIN_NOEVICT);
 	if (!IS_ERR(vma)) {
 		node.start = i915_ggtt_offset(vma);
-		node.allocated = false;
+		node.flags = 0;
 	} else {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
 			goto out_rpm;
-		GEM_BUG_ON(!node.allocated);
+		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -604,7 +604,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		unsigned int page_offset = offset_in_page(offset);
 		unsigned int page_length = PAGE_SIZE - page_offset;
 		page_length = remain < page_length ? remain : page_length;
-		if (node.allocated) {
+		if (drm_mm_node_allocated(&node)) {
 			/* flush the write before we modify the GGTT */
 			intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 			ggtt->vm.insert_page(&ggtt->vm,
@@ -636,7 +636,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 out_unpin:
 	mutex_lock(&i915->drm.struct_mutex);
 	intel_gt_flush_ggtt_writes(ggtt->vm.gt);
-	if (node.allocated) {
+	if (drm_mm_node_allocated(&node)) {
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
 		remove_mappable_node(&node);
 	} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index e76c9da9992d..8c1e04f402bc 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -299,7 +299,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 			break;
 		}
 
-		GEM_BUG_ON(!node->allocated);
+		GEM_BUG_ON(!drm_mm_node_allocated(node));
 		vma = container_of(node, typeof(*vma), node);
 
 		/* If we are using coloring to insert guard pages between
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 9d5b0f87c210..68c34b1a20e4 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -795,7 +795,7 @@ void i915_vma_reopen(struct i915_vma *vma)
 
 static void __i915_vma_destroy(struct i915_vma *vma)
 {
-	GEM_BUG_ON(vma->node.allocated);
+	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(vma->fence);
 
 	mutex_lock(&vma->vm->mutex);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 8bcb5812c446..e49b199f7de7 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -228,7 +228,7 @@ static inline bool i915_vma_is_closed(const struct i915_vma *vma)
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-	GEM_BUG_ON(!vma->node.allocated);
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(upper_32_bits(vma->node.start));
 	GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1));
 	return lower_32_bits(vma->node.start);
@@ -390,7 +390,7 @@ static inline bool i915_vma_is_bound(const struct i915_vma *vma,
 static inline bool i915_node_color_differs(const struct drm_mm_node *node,
 					   unsigned long color)
 {
-	return node->allocated && node->color != color;
+	return drm_mm_node_allocated(node) && node->color != color;
 }
 
 /**
diff --git a/drivers/gpu/drm/selftests/test-drm_mm.c b/drivers/gpu/drm/selftests/test-drm_mm.c
index 388f9844f4ba..9aabe82dcd3a 100644
--- a/drivers/gpu/drm/selftests/test-drm_mm.c
+++ b/drivers/gpu/drm/selftests/test-drm_mm.c
@@ -854,7 +854,7 @@ static bool assert_contiguous_in_range(struct drm_mm *mm,
 
 	if (start > 0) {
 		node = __drm_mm_interval_first(mm, 0, start - 1);
-		if (node->allocated) {
+		if (drm_mm_node_allocated(node)) {
 			pr_err("node before start: node=%llx+%llu, start=%llx\n",
 			       node->start, node->size, start);
 			return false;
@@ -863,7 +863,7 @@ static bool assert_contiguous_in_range(struct drm_mm *mm,
 
 	if (end < U64_MAX) {
 		node = __drm_mm_interval_first(mm, end, U64_MAX);
-		if (node->allocated) {
+		if (drm_mm_node_allocated(node)) {
 			pr_err("node after end: node=%llx+%llu, end=%llx\n",
 			       node->start, node->size, end);
 			return false;
@@ -1156,12 +1156,12 @@ static void show_holes(const struct drm_mm *mm, int count)
 		struct drm_mm_node *next = list_next_entry(hole, node_list);
 		const char *node1 = NULL, *node2 = NULL;
 
-		if (hole->allocated)
+		if (drm_mm_node_allocated(hole))
 			node1 = kasprintf(GFP_KERNEL,
 					  "[%llx + %lld, color=%ld], ",
 					  hole->start, hole->size, hole->color);
 
-		if (next->allocated)
+		if (drm_mm_node_allocated(next))
 			node2 = kasprintf(GFP_KERNEL,
 					  ", [%llx + %lld, color=%ld]",
 					  next->start, next->size, next->color);
@@ -1900,18 +1900,18 @@ static void separate_adjacent_colors(const struct drm_mm_node *node,
 				     u64 *start,
 				     u64 *end)
 {
-	if (node->allocated && node->color != color)
+	if (drm_mm_node_allocated(node) && node->color != color)
 		++*start;
 
 	node = list_next_entry(node, node_list);
-	if (node->allocated && node->color != color)
+	if (drm_mm_node_allocated(node) && node->color != color)
 		--*end;
 }
 
 static bool colors_abutt(const struct drm_mm_node *node)
 {
 	if (!drm_mm_hole_follows(node) &&
-	    list_next_entry(node, node_list)->allocated) {
+	    drm_mm_node_allocated(list_next_entry(node, node_list))) {
 		pr_err("colors abutt; %ld [%llx + %llx] is next to %ld [%llx + %llx]!\n",
 		       node->color, node->start, node->size,
 		       list_next_entry(node, node_list)->color,
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index f1f0a7c87771..b00e20f5ce05 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -994,7 +994,7 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
 	struct vc4_dev *vc4 = to_vc4_dev(crtc->dev);
 	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
 
-	if (vc4_state->mm.allocated) {
+	if (drm_mm_node_allocated(&vc4_state->mm)) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index 9936b15d0bf1..5a43659da319 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -315,7 +315,7 @@ static void vc4_hvs_unbind(struct device *dev, struct device *master,
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = drm->dev_private;
 
-	if (vc4->hvs->mitchell_netravali_filter.allocated)
+	if (drm_mm_node_allocated(&vc4->hvs->mitchell_netravali_filter))
 		drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter);
 
 	drm_mm_takedown(&vc4->hvs->dlist_mm);
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 5e5f90810aca..4934127f0d76 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -178,7 +178,7 @@ static void vc4_plane_destroy_state(struct drm_plane *plane,
 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 
-	if (vc4_state->lbm.allocated) {
+	if (drm_mm_node_allocated(&vc4_state->lbm)) {
 		unsigned long irqflags;
 
 		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
@@ -557,7 +557,7 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
 	/* Allocate the LBM memory that the HVS will use for temporary
 	 * storage due to our scaling/format conversion.
 	 */
-	if (!vc4_state->lbm.allocated) {
+	if (!drm_mm_node_allocated(&vc4_state->lbm)) {
 		int ret;
 
 		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 2c3bbb43c7d1..d7939c054259 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -168,8 +168,9 @@ struct drm_mm_node {
 	struct rb_node rb_hole_addr;
 	u64 __subtree_last;
 	u64 hole_size;
-	bool allocated : 1;
-	bool scanned_block : 1;
+	unsigned long flags;
+#define DRM_MM_NODE_ALLOCATED_BIT	0
+#define DRM_MM_NODE_SCANNED_BIT		1
 #ifdef CONFIG_DRM_DEBUG_MM
 	depot_stack_handle_t stack;
 #endif
@@ -253,7 +254,7 @@ struct drm_mm_scan {
  */
 static inline bool drm_mm_node_allocated(const struct drm_mm_node *node)
 {
-	return node->allocated;
+	return test_bit(DRM_MM_NODE_ALLOCATED_BIT, &node->flags);
 }
 
 /**
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 04/30] drm/i915: Only track bound elements of the GTT
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
  2019-10-02 11:19 ` [PATCH 02/30] dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling) Chris Wilson
  2019-10-02 11:19 ` [PATCH 03/30] drm/mm: Pack allocated/scanned boolean into a bitfield Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 05/30] drm/i915: Mark up address spaces that may need to allocate Chris Wilson
                   ` (31 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

The premise here is to simply avoiding having to acquire the vm->mutex
inside vma create/destroy to update the vm->unbound_lists, to avoid some
nasty lock recursions later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c    |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 23 ++++---------------
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  5 ----
 drivers/gpu/drm/i915/i915_vma.c               | 12 ++--------
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  2 +-
 5 files changed, 8 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index bfbc3e3daf92..e45eb8721850 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -692,7 +692,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	__i915_vma_set_map_and_fenceable(vma);
 
 	mutex_lock(&ggtt->vm.mutex);
-	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
+	list_add_tail(&vma->vm_link, &ggtt->vm.bound_list);
 	mutex_unlock(&ggtt->vm.mutex);
 
 	GEM_BUG_ON(i915_gem_object_is_shrinkable(obj));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e62e9d1a1307..ad9eb2d68f3f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -505,19 +505,12 @@ static void i915_address_space_fini(struct i915_address_space *vm)
 
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
 {
-	struct list_head *phases[] = {
-		&vm->bound_list,
-		&vm->unbound_list,
-		NULL,
-	}, **phase;
+	struct i915_vma *vma, *vn;
 
 	mutex_lock(&vm->i915->drm.struct_mutex);
-	for (phase = phases; *phase; phase++) {
-		struct i915_vma *vma, *vn;
-
-		list_for_each_entry_safe(vma, vn, *phase, vm_link)
-			i915_vma_destroy(vma);
-	}
+	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link)
+		i915_vma_destroy(vma);
+	GEM_BUG_ON(!list_empty(&vm->bound_list));
 	mutex_unlock(&vm->i915->drm.struct_mutex);
 }
 
@@ -528,9 +521,6 @@ static void __i915_vm_release(struct work_struct *work)
 
 	ppgtt_destroy_vma(vm);
 
-	GEM_BUG_ON(!list_empty(&vm->bound_list));
-	GEM_BUG_ON(!list_empty(&vm->unbound_list));
-
 	vm->cleanup(vm);
 	i915_address_space_fini(vm);
 
@@ -569,7 +559,6 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 
 	stash_init(&vm->free_pages);
 
-	INIT_LIST_HEAD(&vm->unbound_list);
 	INIT_LIST_HEAD(&vm->bound_list);
 }
 
@@ -1887,10 +1876,6 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 	INIT_LIST_HEAD(&vma->obj_link);
 	INIT_LIST_HEAD(&vma->closed_link);
 
-	mutex_lock(&vma->vm->mutex);
-	list_add(&vma->vm_link, &vma->vm->unbound_list);
-	mutex_unlock(&vma->vm->mutex);
-
 	return vma;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8fd2234ba0bf..bbdc735466c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -320,11 +320,6 @@ struct i915_address_space {
 	 */
 	struct list_head bound_list;
 
-	/**
-	 * List of vma that are not unbound.
-	 */
-	struct list_head unbound_list;
-
 	struct pagestash free_pages;
 
 	/* Global GTT */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 68c34b1a20e4..d097f77890ba 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -218,10 +218,6 @@ vma_create(struct drm_i915_gem_object *obj,
 
 	spin_unlock(&obj->vma.lock);
 
-	mutex_lock(&vm->mutex);
-	list_add(&vma->vm_link, &vm->unbound_list);
-	mutex_unlock(&vm->mutex);
-
 	return vma;
 
 err_vma:
@@ -657,7 +653,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
 	mutex_lock(&vma->vm->mutex);
-	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
 	mutex_unlock(&vma->vm->mutex);
 
 	if (vma->obj) {
@@ -685,7 +681,7 @@ i915_vma_remove(struct i915_vma *vma)
 
 	mutex_lock(&vma->vm->mutex);
 	drm_mm_remove_node(&vma->node);
-	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+	list_del(&vma->vm_link);
 	mutex_unlock(&vma->vm->mutex);
 
 	/*
@@ -798,10 +794,6 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(vma->fence);
 
-	mutex_lock(&vma->vm->mutex);
-	list_del(&vma->vm_link);
-	mutex_unlock(&vma->vm->mutex);
-
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 0d40e0b42923..84d94eabc7dd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1242,7 +1242,7 @@ static void track_vma_bind(struct i915_vma *vma)
 	vma->pages = obj->mm.pages;
 
 	mutex_lock(&vma->vm->mutex);
-	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
 	mutex_unlock(&vma->vm->mutex);
 }
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 05/30] drm/i915: Mark up address spaces that may need to allocate
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (2 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 04/30] drm/i915: Only track bound elements of the GTT Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 15:47   ` Tvrtko Ursulin
  2019-10-02 11:19 ` [PATCH 06/30] drm/i915: Pull i915_vma_pin under the vm->mutex Chris Wilson
                   ` (30 subsequent siblings)
  34 siblings, 1 reply; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Since we cannot allocate underneath the vm->mutex (it is used in the
direct-reclaim paths), we need to shift the allocations off into a
mutexless worker with fence recursion prevention. To know when we need
this protection, we mark up the address spaces that do allocate before
insertion. In the future, we may wish to extend the async bind scheme to
more than just allocations.

v2: s/vm->bind_alloc/vm->bind_async_flags/

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
 drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ad9eb2d68f3f..8eba63ecdb03 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1502,6 +1502,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 			goto err_free_pd;
 	}
 
+	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
 	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
 	ppgtt->vm.clear_range = gen8_ppgtt_clear;
@@ -1950,6 +1951,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 	ppgtt_init(&ppgtt->base, &i915->gt);
 	ppgtt->base.vm.top = 1;
 
+	ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
 	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
 	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
@@ -2581,6 +2583,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
 		goto err_ppgtt;
 
 	ggtt->alias = ppgtt;
+	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
 
 	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
 	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index bbdc735466c1..3502b9c85a8e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -305,6 +305,8 @@ struct i915_address_space {
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 	u64 reserved;		/* size addr space reserved */
 
+	unsigned int bind_async_flags;
+
 	bool closed;
 
 	struct mutex mutex; /* protects vma and our lists */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 06/30] drm/i915: Pull i915_vma_pin under the vm->mutex
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (3 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 05/30] drm/i915: Mark up address spaces that may need to allocate Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 07/30] drm/i915: Push the i915_active.retire into a worker Chris Wilson
                   ` (29 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).

In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.

Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!

v2: Add some commentary, and some helpers to reduce patch churn.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c  |  29 +-
 drivers/gpu/drm/i915/display/intel_dsb.c      |   7 +-
 drivers/gpu/drm/i915/display/intel_fbdev.c    |   8 +-
 drivers/gpu/drm/i915/display/intel_overlay.c  |  11 +-
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  20 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  13 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  43 +-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      |  20 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  33 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   5 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  73 +--
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c    |   8 +-
 drivers/gpu/drm/i915/gem/i915_gem_tiling.c    |  27 +-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |  27 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |   9 +-
 .../drm/i915/gem/selftests/i915_gem_context.c |  12 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c    |   4 -
 .../drm/i915/gem/selftests/igt_gem_utils.c    |   7 +-
 drivers/gpu/drm/i915/gt/intel_gt.c            |   5 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   4 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  19 +-
 drivers/gpu/drm/i915/gvt/aperture_gm.c        |  12 +-
 drivers/gpu/drm/i915/i915_active.c            |  95 +++-
 drivers/gpu/drm/i915/i915_active.h            |   7 +
 drivers/gpu/drm/i915/i915_active_types.h      |   5 +
 drivers/gpu/drm/i915/i915_drv.c               |   2 -
 drivers/gpu/drm/i915/i915_gem.c               |  83 ++-
 drivers/gpu/drm/i915/i915_gem_evict.c         |  28 +-
 drivers/gpu/drm/i915/i915_gem_fence_reg.c     |   9 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 109 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  45 +-
 drivers/gpu/drm/i915/i915_perf.c              |  32 +-
 drivers/gpu/drm/i915/i915_vma.c               | 524 ++++++++++++------
 drivers/gpu/drm/i915/i915_vma.h               |  84 +--
 drivers/gpu/drm/i915/selftests/i915_gem.c     |   2 -
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  36 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  58 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |   7 +
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   6 +-
 40 files changed, 824 insertions(+), 706 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index d99c59e97568..c64c7045de28 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2079,7 +2079,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	unsigned int pinctl;
 	u32 alignment;
 
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 	if (WARN_ON(!i915_gem_object_is_framebuffer(obj)))
 		return ERR_PTR(-EINVAL);
 
@@ -2163,8 +2162,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 
 void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
 	i915_gem_object_lock(vma->obj);
 	if (flags & PLANE_HAS_FENCE)
 		i915_vma_unpin_fence(vma);
@@ -3065,12 +3062,10 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 		return false;
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	obj = i915_gem_object_create_stolen_for_preallocated(dev_priv,
 							     base_aligned,
 							     base_aligned,
 							     size_aligned);
-	mutex_unlock(&dev->struct_mutex);
 	if (!obj)
 		return false;
 
@@ -3232,13 +3227,11 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	intel_state->color_plane[0].stride =
 		intel_fb_pitch(fb, 0, intel_state->base.rotation);
 
-	mutex_lock(&dev->struct_mutex);
 	intel_state->vma =
 		intel_pin_and_fence_fb_obj(fb,
 					   &intel_state->view,
 					   intel_plane_uses_fence(intel_state),
 					   &intel_state->flags);
-	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(intel_state->vma)) {
 		DRM_ERROR("failed to pin boot fb on pipe %d: %li\n",
 			  intel_crtc->pipe, PTR_ERR(intel_state->vma));
@@ -14365,8 +14358,6 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
  * bits.  Some older platforms need special physical address handling for
  * cursor planes.
  *
- * Must be called with struct_mutex held.
- *
  * Returns 0 on success, negative error code on failure.
  */
 int
@@ -14423,15 +14414,8 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	if (ret)
 		return ret;
 
-	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
-	if (ret) {
-		i915_gem_object_unpin_pages(obj);
-		return ret;
-	}
-
 	ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 	if (ret)
 		return ret;
@@ -14480,8 +14464,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
  * @old_state: the state from the previous modeset
  *
  * Cleans up a framebuffer that has just been removed from a plane.
- *
- * Must be called with struct_mutex held.
  */
 void
 intel_cleanup_plane_fb(struct drm_plane *plane,
@@ -14497,9 +14479,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
 	}
 
 	/* Should only be called after a successful intel_prepare_plane_fb()! */
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_plane_unpin_fb(to_intel_plane_state(old_state));
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
 
 int
@@ -14702,7 +14682,6 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 			   u32 src_w, u32 src_h,
 			   struct drm_modeset_acquire_ctx *ctx)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	struct drm_plane_state *old_plane_state, *new_plane_state;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct intel_crtc_state *crtc_state =
@@ -14768,13 +14747,9 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 	if (ret)
 		goto out_free;
 
-	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
-	if (ret)
-		goto out_free;
-
 	ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state));
 	if (ret)
-		goto out_unlock;
+		goto out_free;
 
 	intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP);
 	intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb),
@@ -14804,8 +14779,6 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 
 	intel_plane_unpin_fb(to_intel_plane_state(old_plane_state));
 
-out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 out_free:
 	if (new_crtc_state)
 		intel_crtc_destroy_state(crtc, &new_crtc_state->base);
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index 0a0a1536ac96..bb5a0e91b370 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -119,9 +119,7 @@ intel_dsb_get(struct intel_crtc *crtc)
 		goto err;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
-	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(vma)) {
 		DRM_ERROR("Vma creation failed\n");
 		i915_gem_object_put(obj);
@@ -164,10 +162,7 @@ void intel_dsb_put(struct intel_dsb *dsb)
 		return;
 
 	if (atomic_dec_and_test(&dsb->refcount)) {
-		mutex_lock(&i915->drm.struct_mutex);
-		i915_gem_object_unpin_map(dsb->vma->obj);
-		i915_vma_unpin_and_release(&dsb->vma, 0);
-		mutex_unlock(&i915->drm.struct_mutex);
+		i915_vma_unpin_and_release(&dsb->vma, I915_VMA_RELEASE_MAP);
 		dsb->cmd_buf = NULL;
 		dsb->free_pos = 0;
 		dsb->ins_start_offset = 0;
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 68338669f054..97cde017670a 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -204,7 +204,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		sizes->fb_height = intel_fb->base.height;
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	/* Pin the GGTT vma for our access via info->screen_base.
@@ -266,7 +265,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	ifbdev->vma_flags = flags;
 
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev->struct_mutex);
 	vga_switcheroo_client_fb_set(pdev, info);
 	return 0;
 
@@ -274,7 +272,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	intel_unpin_fb_vma(vma, flags);
 out_unlock:
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
@@ -291,11 +288,8 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
 
 	drm_fb_helper_fini(&ifbdev->helper);
 
-	if (ifbdev->vma) {
-		mutex_lock(&ifbdev->helper.dev->struct_mutex);
+	if (ifbdev->vma)
 		intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags);
-		mutex_unlock(&ifbdev->helper.dev->struct_mutex);
-	}
 
 	if (ifbdev->fb)
 		drm_framebuffer_remove(&ifbdev->fb->base);
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 5efef9babadb..3f4ac1ee7668 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -1303,15 +1303,11 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
 	struct i915_vma *vma;
 	int err;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	obj = i915_gem_object_create_stolen(i915, PAGE_SIZE);
 	if (obj == NULL)
 		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto err_unlock;
-	}
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (IS_ERR(vma)) {
@@ -1332,13 +1328,10 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
 	}
 
 	overlay->reg_bo = obj;
-	mutex_unlock(&i915->drm.struct_mutex);
 	return 0;
 
 err_put_bo:
 	i915_gem_object_put(obj);
-err_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 7f61a8024133..c1fca5728e6e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work)
 	 * keep track of the GPU activity within this vma/request, and
 	 * propagate the signal from the request to w->dma.
 	 */
-	err = i915_active_add_request(&vma->active, rq);
+	err = __i915_vma_move_to_active(vma, rq);
 	if (err)
 		goto out_request;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index f7ba0935ed67..95f8e66e45db 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -313,8 +313,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
 	release_hw_id(ctx);
-	if (ctx->vm)
-		i915_vm_put(ctx->vm);
 
 	free_engines(rcu_access_pointer(ctx->engines));
 	mutex_destroy(&ctx->engines_mutex);
@@ -379,9 +377,13 @@ void i915_gem_context_release(struct kref *ref)
 
 static void context_close(struct i915_gem_context *ctx)
 {
+	i915_gem_context_set_closed(ctx);
+
+	if (ctx->vm)
+		i915_vm_close(ctx->vm);
+
 	mutex_lock(&ctx->mutex);
 
-	i915_gem_context_set_closed(ctx);
 	ctx->file_priv = ERR_PTR(-EBADF);
 
 	/*
@@ -474,7 +476,7 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 
 	GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
 
-	ctx->vm = i915_vm_get(vm);
+	ctx->vm = i915_vm_open(vm);
 	context_apply_all(ctx, __apply_ppgtt, vm);
 
 	return old;
@@ -488,7 +490,7 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
 
 	vm = __set_ppgtt(ctx, vm);
 	if (vm)
-		i915_vm_put(vm);
+		i915_vm_close(vm);
 }
 
 static void __set_timeline(struct intel_timeline **dst,
@@ -953,7 +955,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
 	if (ret < 0)
 		goto err_unlock;
 
-	i915_vm_get(vm);
+	i915_vm_open(vm);
 
 	args->size = 0;
 	args->value = ret;
@@ -973,7 +975,7 @@ static void set_ppgtt_barrier(void *data)
 	if (INTEL_GEN(old->i915) < 8)
 		gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old));
 
-	i915_vm_put(old);
+	i915_vm_close(old);
 }
 
 static int emit_ppgtt_update(struct i915_request *rq, void *data)
@@ -1090,8 +1092,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 				   set_ppgtt_barrier,
 				   old);
 	if (err) {
-		i915_vm_put(__set_ppgtt(ctx, old));
-		i915_vm_put(old);
+		i915_vm_close(__set_ppgtt(ctx, old));
+		i915_vm_close(old);
 	}
 
 unlock:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 55c3ab59e3aa..9937b4c341f1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -288,7 +288,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
-			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+			/* Wait for an earlier async bind, need to rewrite it */
+			ret = i915_vma_sync(vma);
+			if (ret)
+				return ret;
+
+			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL);
 			if (ret)
 				return ret;
 		}
@@ -391,16 +396,11 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto out;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		goto out;
-
 	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret == 0) {
 		ret = i915_gem_object_set_cache_level(obj, level);
 		i915_gem_object_unlock(obj);
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 out:
 	i915_gem_object_put(obj);
@@ -485,6 +485,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
+		GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
 		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 	}
 	mutex_unlock(&i915->ggtt.vm.mutex);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index c049199a1df5..88a881be12ec 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -698,7 +698,9 @@ static int eb_reserve(struct i915_execbuffer *eb)
 
 		case 1:
 			/* Too fragmented, unbind everything and retry */
+			mutex_lock(&eb->context->vm->mutex);
 			err = i915_gem_evict_vm(eb->context->vm);
+			mutex_unlock(&eb->context->vm->mutex);
 			if (err)
 				return err;
 			break;
@@ -972,7 +974,9 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 			ggtt->vm.clear_range(&ggtt->vm,
 					     cache->node.start,
 					     cache->node.size);
+			mutex_lock(&ggtt->vm.mutex);
 			drm_mm_remove_node(&cache->node);
+			mutex_unlock(&ggtt->vm.mutex);
 		} else {
 			i915_vma_unpin((struct i915_vma *)cache->node.mm);
 		}
@@ -1047,11 +1051,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 					       PIN_NOEVICT);
 		if (IS_ERR(vma)) {
 			memset(&cache->node, 0, sizeof(cache->node));
+			mutex_lock(&ggtt->vm.mutex);
 			err = drm_mm_insert_node_in_range
 				(&ggtt->vm.mm, &cache->node,
 				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
 				 0, ggtt->mappable_end,
 				 DRM_MM_INSERT_LOW);
+			mutex_unlock(&ggtt->vm.mutex);
 			if (err) /* no inactive aperture space, use cpu reloc */
 				return NULL;
 		} else {
@@ -1416,7 +1422,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 		    IS_GEN(eb->i915, 6)) {
 			err = i915_vma_bind(target, target->obj->cache_level,
-					    PIN_GLOBAL);
+					    PIN_GLOBAL, NULL);
 			if (WARN_ONCE(err,
 				      "Unexpected failure to bind target VMA!"))
 				return err;
@@ -2140,35 +2146,6 @@ static struct i915_request *eb_throttle(struct intel_context *ce)
 	return i915_request_get(rq);
 }
 
-static int
-__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
-{
-	int err;
-
-	if (likely(atomic_inc_not_zero(&ce->pin_count)))
-		return 0;
-
-	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
-	if (err)
-		return err;
-
-	err = __intel_context_do_pin(ce);
-	mutex_unlock(&eb->i915->drm.struct_mutex);
-
-	return err;
-}
-
-static void
-__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
-{
-	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
-		return;
-
-	mutex_lock(&eb->i915->drm.struct_mutex);
-	intel_context_unpin(ce);
-	mutex_unlock(&eb->i915->drm.struct_mutex);
-}
-
 static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
 {
 	struct intel_timeline *tl;
@@ -2188,7 +2165,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	err = __eb_pin_context(eb, ce);
+	err = intel_context_pin(ce);
 	if (err)
 		return err;
 
@@ -2232,7 +2209,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
 	intel_context_exit(ce);
 	intel_context_timeline_unlock(tl);
 err_unpin:
-	__eb_unpin_context(eb, ce);
+	intel_context_unpin(ce);
 	return err;
 }
 
@@ -2245,7 +2222,7 @@ static void eb_unpin_engine(struct i915_execbuffer *eb)
 	intel_context_exit(ce);
 	mutex_unlock(&tl->mutex);
 
-	__eb_unpin_context(eb, ce);
+	intel_context_unpin(ce);
 }
 
 static unsigned int
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index dd0c2840ba4d..c19431d609fc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -249,16 +249,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	if (ret)
 		goto err_rpm;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto err_reset;
-
-	/* Access to snoopable pages through the GTT is incoherent. */
-	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
-		ret = -EFAULT;
-		goto err_unlock;
-	}
-
 	/* Now pin it into the GTT as needed */
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
@@ -291,7 +281,13 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	}
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
-		goto err_unlock;
+		goto err_reset;
+	}
+
+	/* Access to snoopable pages through the GTT is incoherent. */
+	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+		ret = -EFAULT;
+		goto err_unpin;
 	}
 
 	ret = i915_vma_pin_fence(vma);
@@ -329,8 +325,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	i915_vma_unpin_fence(vma);
 err_unpin:
 	__i915_vma_unpin(vma);
-err_unlock:
-	mutex_unlock(&dev->struct_mutex);
 err_reset:
 	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
 err_rpm:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 0ef60dae23a7..dbf9be9a79f4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -155,21 +155,30 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	llist_for_each_entry_safe(obj, on, freed, freed) {
-		struct i915_vma *vma, *vn;
-
 		trace_i915_gem_object_destroy(obj);
 
-		mutex_lock(&i915->drm.struct_mutex);
-
-		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
-			GEM_BUG_ON(i915_vma_is_active(vma));
-			atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
-			i915_vma_destroy(vma);
+		if (!list_empty(&obj->vma.list)) {
+			struct i915_vma *vma;
+
+			/*
+			 * Note that the vma keeps an object reference while
+			 * it is active, so it *should* not sleep while we
+			 * destroy it. Our debug code errs insits it *might*.
+			 * For the moment, play along.
+			 */
+			spin_lock(&obj->vma.lock);
+			while ((vma = list_first_entry_or_null(&obj->vma.list,
+							       struct i915_vma,
+							       obj_link))) {
+				GEM_BUG_ON(vma->obj != obj);
+				spin_unlock(&obj->vma.lock);
+
+				i915_vma_destroy(vma);
+
+				spin_lock(&obj->vma.lock);
+			}
+			spin_unlock(&obj->vma.lock);
 		}
-		GEM_BUG_ON(!list_empty(&obj->vma.list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
-
-		mutex_unlock(&i915->drm.struct_mutex);
 
 		GEM_BUG_ON(atomic_read(&obj->bind_count));
 		GEM_BUG_ON(obj->userfault_count);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index aec05f967d9d..a70fbbd8c98d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -106,6 +106,11 @@ static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
 	dma_resv_lock(obj->base.resv, NULL);
 }
 
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
+{
+	return dma_resv_trylock(obj->base.resv);
+}
+
 static inline int
 i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index d2c05d752909..fd3ce6da8497 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -16,40 +16,6 @@
 
 #include "i915_trace.h"
 
-static bool shrinker_lock(struct drm_i915_private *i915,
-			  unsigned int flags,
-			  bool *unlock)
-{
-	struct mutex *m = &i915->drm.struct_mutex;
-
-	switch (mutex_trylock_recursive(m)) {
-	case MUTEX_TRYLOCK_RECURSIVE:
-		*unlock = false;
-		return true;
-
-	case MUTEX_TRYLOCK_FAILED:
-		*unlock = false;
-		if (flags & I915_SHRINK_ACTIVE &&
-		    mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0)
-			*unlock = true;
-		return *unlock;
-
-	case MUTEX_TRYLOCK_SUCCESS:
-		*unlock = true;
-		return true;
-	}
-
-	BUG();
-}
-
-static void shrinker_unlock(struct drm_i915_private *i915, bool unlock)
-{
-	if (!unlock)
-		return;
-
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
 static bool swap_available(void)
 {
 	return get_nr_swap_pages() > 0;
@@ -155,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	intel_wakeref_t wakeref = 0;
 	unsigned long count = 0;
 	unsigned long scanned = 0;
-	bool unlock;
-
-	if (!shrinker_lock(i915, shrink, &unlock))
-		return 0;
 
 	/*
 	 * When shrinking the active list, we should also consider active
@@ -268,8 +230,6 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	if (shrink & I915_SHRINK_BOUND)
 		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
-	shrinker_unlock(i915, unlock);
-
 	if (nr_scanned)
 		*nr_scanned += scanned;
 	return count;
@@ -339,19 +299,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 	struct drm_i915_private *i915 =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
 	unsigned long freed;
-	bool unlock;
 
 	sc->nr_scanned = 0;
 
-	if (!shrinker_lock(i915, 0, &unlock))
-		return SHRINK_STOP;
-
 	freed = i915_gem_shrink(i915,
 				sc->nr_to_scan,
 				&sc->nr_scanned,
 				I915_SHRINK_BOUND |
-				I915_SHRINK_UNBOUND |
-				I915_SHRINK_WRITEBACK);
+				I915_SHRINK_UNBOUND);
 	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
 		intel_wakeref_t wakeref;
 
@@ -366,8 +321,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 		}
 	}
 
-	shrinker_unlock(i915, unlock);
-
 	return sc->nr_scanned ? freed : SHRINK_STOP;
 }
 
@@ -384,6 +337,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	freed_pages = 0;
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
+					       I915_SHRINK_ACTIVE |
 					       I915_SHRINK_BOUND |
 					       I915_SHRINK_UNBOUND |
 					       I915_SHRINK_WRITEBACK);
@@ -419,10 +373,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	struct i915_vma *vma, *next;
 	unsigned long freed_pages = 0;
 	intel_wakeref_t wakeref;
-	bool unlock;
-
-	if (!shrinker_lock(i915, 0, &unlock))
-		return NOTIFY_DONE;
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
@@ -439,15 +389,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		if (!vma->iomap || i915_vma_is_active(vma))
 			continue;
 
-		mutex_unlock(&i915->ggtt.vm.mutex);
-		if (i915_vma_unbind(vma) == 0)
+		if (__i915_vma_unbind(vma) == 0)
 			freed_pages += count;
-		mutex_lock(&i915->ggtt.vm.mutex);
 	}
 	mutex_unlock(&i915->ggtt.vm.mutex);
 
-	shrinker_unlock(i915, unlock);
-
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
 }
@@ -490,22 +436,9 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
 
 	fs_reclaim_acquire(GFP_KERNEL);
 
-	/*
-	 * As we invariably rely on the struct_mutex within the shrinker,
-	 * but have a complicated recursion dance, taint all the mutexes used
-	 * within the shrinker with the struct_mutex. For completeness, we
-	 * taint with all subclass of struct_mutex, even though we should
-	 * only need tainting by I915_MM_NORMAL to catch possible ABBA
-	 * deadlocks from using struct_mutex inside @mutex.
-	 */
-	mutex_acquire(&i915->drm.struct_mutex.dep_map,
-		      I915_MM_SHRINKER, 0, _RET_IP_);
-
 	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
 	mutex_release(&mutex->dep_map, 0, _RET_IP_);
 
-	mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
-
 	fs_reclaim_release(GFP_KERNEL);
 
 	if (unlock)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index e45eb8721850..fad98a921cde 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -624,8 +624,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
 		return NULL;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
 			 &stolen_offset, &gtt_offset, &size);
 
@@ -677,21 +675,25 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	 * setting up the GTT space. The actual reservation will occur
 	 * later.
 	 */
+	mutex_lock(&ggtt->vm.mutex);
 	ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 				   size, gtt_offset, obj->cache_level,
 				   0);
 	if (ret) {
 		DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n");
+		mutex_unlock(&ggtt->vm.mutex);
 		goto err_pages;
 	}
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
+	GEM_BUG_ON(vma->pages);
 	vma->pages = obj->mm.pages;
+	atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
+
 	set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
 	__i915_vma_set_map_and_fenceable(vma);
 
-	mutex_lock(&ggtt->vm.mutex);
 	list_add_tail(&vma->vm_link, &ggtt->vm.bound_list);
 	mutex_unlock(&ggtt->vm.mutex);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index e5d1ae8d4dba..dc2a83ce44d5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -181,22 +181,25 @@ static int
 i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
 			      int tiling_mode, unsigned int stride)
 {
+	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
 	struct i915_vma *vma;
-	int ret;
+	int ret = 0;
 
 	if (tiling_mode == I915_TILING_NONE)
 		return 0;
 
+	mutex_lock(&ggtt->vm.mutex);
 	for_each_ggtt_vma(vma, obj) {
 		if (i915_vma_fence_prepare(vma, tiling_mode, stride))
 			continue;
 
-		ret = i915_vma_unbind(vma);
+		ret = __i915_vma_unbind(vma);
 		if (ret)
-			return ret;
+			break;
 	}
+	mutex_unlock(&ggtt->vm.mutex);
 
-	return 0;
+	return ret;
 }
 
 int
@@ -212,7 +215,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 
 	GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride));
 	GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE));
-	lockdep_assert_held(&i915->drm.struct_mutex);
 
 	if ((tiling | stride) == obj->tiling_and_stride)
 		return 0;
@@ -233,16 +235,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 	 * whilst executing a fenced command for an untiled object.
 	 */
 
-	err = i915_gem_object_fence_prepare(obj, tiling, stride);
-	if (err)
-		return err;
-
 	i915_gem_object_lock(obj);
 	if (i915_gem_object_is_framebuffer(obj)) {
 		i915_gem_object_unlock(obj);
 		return -EBUSY;
 	}
 
+	err = i915_gem_object_fence_prepare(obj, tiling, stride);
+	if (err) {
+		i915_gem_object_unlock(obj);
+		return err;
+	}
+
 	/* If the memory has unknown (i.e. varying) swizzling, we pin the
 	 * pages to prevent them being swapped out and causing corruption
 	 * due to the change in swizzling.
@@ -368,12 +372,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	err = mutex_lock_interruptible(&dev->struct_mutex);
-	if (err)
-		goto err;
-
 	err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
-	mutex_unlock(&dev->struct_mutex);
 
 	/* We have to maintain this existing ABI... */
 	args->stride = i915_gem_object_get_stride(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 6b3b50f0f6d9..1738a15eb911 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -92,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	struct i915_mmu_notifier *mn =
 		container_of(_mn, struct i915_mmu_notifier, mn);
 	struct interval_tree_node *it;
-	struct mutex *unlock = NULL;
 	unsigned long end;
 	int ret = 0;
 
@@ -129,33 +128,13 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 		}
 		spin_unlock(&mn->lock);
 
-		if (!unlock) {
-			unlock = &mn->mm->i915->drm.struct_mutex;
-
-			switch (mutex_trylock_recursive(unlock)) {
-			default:
-			case MUTEX_TRYLOCK_FAILED:
-				if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) {
-					i915_gem_object_put(obj);
-					return -EINTR;
-				}
-				/* fall through */
-			case MUTEX_TRYLOCK_SUCCESS:
-				break;
-
-			case MUTEX_TRYLOCK_RECURSIVE:
-				unlock = ERR_PTR(-EEXIST);
-				break;
-			}
-		}
-
 		ret = i915_gem_object_unbind(obj,
 					     I915_GEM_OBJECT_UNBIND_ACTIVE);
 		if (ret == 0)
 			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
 		i915_gem_object_put(obj);
 		if (ret)
-			goto unlock;
+			return ret;
 
 		spin_lock(&mn->lock);
 
@@ -168,10 +147,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	}
 	spin_unlock(&mn->lock);
 
-unlock:
-	if (!IS_ERR_OR_NULL(unlock))
-		mutex_unlock(unlock);
-
 	return ret;
 
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index c5cea4379216..98b2a6ccfcc1 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -333,7 +333,12 @@ static int igt_check_page_sizes(struct i915_vma *vma)
 	struct drm_i915_private *i915 = vma->vm->i915;
 	unsigned int supported = INTEL_INFO(i915)->page_sizes;
 	struct drm_i915_gem_object *obj = vma->obj;
-	int err = 0;
+	int err;
+
+	/* We have to wait for the async bind to complete before our asserts */
+	err = i915_vma_sync(vma);
+	if (err)
+		return err;
 
 	if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
 		pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
@@ -1390,7 +1395,7 @@ static int igt_ppgtt_pin_update(void *arg)
 			goto out_unpin;
 		}
 
-		err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE);
+		err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL);
 		if (err)
 			goto out_unpin;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 0f4d0644a480..8eba0d3a31de 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -971,10 +971,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	if (err)
 		goto skip_request;
 
-	i915_vma_unpin(batch);
-	i915_vma_close(batch);
-	i915_vma_put(batch);
-
+	i915_vma_unpin_and_release(&batch, 0);
 	i915_vma_unpin(vma);
 
 	*rq_out = i915_request_get(rq);
@@ -988,8 +985,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 err_request:
 	i915_request_add(rq);
 err_batch:
-	i915_vma_unpin(batch);
-	i915_vma_put(batch);
+	i915_vma_unpin_and_release(&batch, 0);
 err_vma:
 	i915_vma_unpin(vma);
 
@@ -1533,9 +1529,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto skip_request;
 
-	i915_vma_unpin(vma);
-	i915_vma_close(vma);
-	i915_vma_put(vma);
+	i915_vma_unpin_and_release(&vma, 0);
 
 	i915_request_add(rq);
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index aefe557527f8..36aca1c172e7 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -322,7 +322,6 @@ static int igt_partial_tiling(void *arg)
 		goto out;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (1) {
@@ -415,7 +414,6 @@ next_tiling: ;
 
 out_unlock:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 out:
 	i915_gem_object_put(obj);
@@ -458,7 +456,6 @@ static int igt_smoke_tiling(void *arg)
 		goto out;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	count = 0;
@@ -508,7 +505,6 @@ static int igt_smoke_tiling(void *arg)
 	pr_info("%s: Completed %lu trials\n", __func__, count);
 
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 out:
 	i915_gem_object_put(obj);
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
index ee5dc13a30b3..6718da20f35d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -154,9 +154,7 @@ int igt_gpu_fill_dw(struct intel_context *ce,
 
 	i915_request_add(rq);
 
-	i915_vma_unpin(batch);
-	i915_vma_close(batch);
-	i915_vma_put(batch);
+	i915_vma_unpin_and_release(&batch, 0);
 
 	return 0;
 
@@ -165,7 +163,6 @@ int igt_gpu_fill_dw(struct intel_context *ce,
 err_request:
 	i915_request_add(rq);
 err_batch:
-	i915_vma_unpin(batch);
-	i915_vma_put(batch);
+	i915_vma_unpin_and_release(&batch, 0);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 0e5909ee0657..7205595369be 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -302,11 +302,12 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 		struct intel_uncore *uncore = gt->uncore;
+		unsigned long flags;
 
-		spin_lock_irq(&uncore->lock);
+		spin_lock_irqsave(&uncore->lock, flags);
 		intel_uncore_posting_read_fw(uncore,
 					     RING_HEAD(RENDER_RING_BASE));
-		spin_unlock_irq(&uncore->lock);
+		spin_unlock_irqrestore(&uncore->lock, flags);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 0747b8c9f768..ec32996254c0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1338,15 +1338,13 @@ void intel_ring_free(struct kref *ref)
 {
 	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
 
-	i915_vma_close(ring->vma);
 	i915_vma_put(ring->vma);
-
 	kfree(ring);
 }
 
 static void __ring_context_fini(struct intel_context *ce)
 {
-	i915_gem_object_put(ce->state->obj);
+	i915_vma_put(ce->state);
 }
 
 static void ring_context_destroy(struct kref *ref)
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 9c0c8441c22a..d3bee9f88008 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1127,15 +1127,14 @@ static int evict_vma(void *data)
 {
 	struct evict_vma *arg = data;
 	struct i915_address_space *vm = arg->vma->vm;
-	struct drm_i915_private *i915 = vm->i915;
 	struct drm_mm_node evict = arg->vma->node;
 	int err;
 
 	complete(&arg->completion);
 
-	mutex_lock(&i915->drm.struct_mutex);
+	mutex_lock(&vm->mutex);
 	err = i915_gem_evict_for_node(vm, &evict, 0);
-	mutex_unlock(&i915->drm.struct_mutex);
+	mutex_unlock(&vm->mutex);
 
 	return err;
 }
@@ -1143,39 +1142,33 @@ static int evict_vma(void *data)
 static int evict_fence(void *data)
 {
 	struct evict_vma *arg = data;
-	struct drm_i915_private *i915 = arg->vma->vm->i915;
 	int err;
 
 	complete(&arg->completion);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	/* Mark the fence register as dirty to force the mmio update. */
 	err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
 	if (err) {
 		pr_err("Invalid Y-tiling settings; err:%d\n", err);
-		goto out_unlock;
+		return err;
 	}
 
 	err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE);
 	if (err) {
 		pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err);
-		goto out_unlock;
+		return err;
 	}
 
 	err = i915_vma_pin_fence(arg->vma);
 	i915_vma_unpin(arg->vma);
 	if (err) {
 		pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
-		goto out_unlock;
+		return err;
 	}
 
 	i915_vma_unpin_fence(arg->vma);
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return err;
+	return 0;
 }
 
 static int __igt_reset_evict_vma(struct intel_gt *gt,
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 5ff2437b2998..d996bbc7ea59 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -61,14 +61,14 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 		flags = PIN_MAPPABLE;
 	}
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	mutex_lock(&dev_priv->ggtt.vm.mutex);
 	mmio_hw_access_pre(dev_priv);
 	ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node,
 				  size, I915_GTT_PAGE_SIZE,
 				  I915_COLOR_UNEVICTABLE,
 				  start, end, flags);
 	mmio_hw_access_post(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&dev_priv->ggtt.vm.mutex);
 	if (ret)
 		gvt_err("fail to alloc %s gm space from host\n",
 			high_gm ? "high" : "low");
@@ -98,9 +98,9 @@ static int alloc_vgpu_gm(struct intel_vgpu *vgpu)
 
 	return 0;
 out_free_aperture:
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	mutex_lock(&dev_priv->ggtt.vm.mutex);
 	drm_mm_remove_node(&vgpu->gm.low_gm_node);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&dev_priv->ggtt.vm.mutex);
 	return ret;
 }
 
@@ -108,10 +108,10 @@ static void free_vgpu_gm(struct intel_vgpu *vgpu)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	mutex_lock(&dev_priv->ggtt.vm.mutex);
 	drm_mm_remove_node(&vgpu->gm.low_gm_node);
 	drm_mm_remove_node(&vgpu->gm.high_gm_node);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	mutex_unlock(&dev_priv->ggtt.vm.mutex);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index d5aac6ff803a..0791736a08fd 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -146,6 +146,7 @@ __active_retire(struct i915_active *ref)
 	if (!retire)
 		return;
 
+	GEM_BUG_ON(rcu_access_pointer(ref->excl));
 	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
 		GEM_BUG_ON(i915_active_request_isset(&it->base));
 		kmem_cache_free(global.slab_cache, it);
@@ -245,6 +246,8 @@ void __i915_active_init(struct drm_i915_private *i915,
 	ref->flags = 0;
 	ref->active = active;
 	ref->retire = retire;
+
+	ref->excl = NULL;
 	ref->tree = RB_ROOT;
 	ref->cache = NULL;
 	init_llist_head(&ref->preallocated_barriers);
@@ -341,6 +344,46 @@ int i915_active_ref(struct i915_active *ref,
 	return err;
 }
 
+static void excl_cb(struct dma_fence *f, struct dma_fence_cb *cb)
+{
+	struct i915_active *ref = container_of(cb, typeof(*ref), excl_cb);
+
+	RCU_INIT_POINTER(ref->excl, NULL);
+	dma_fence_put(f);
+
+	active_retire(ref);
+}
+
+void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+{
+	/* We expect the caller to manage the exclusive timeline ordering */
+	GEM_BUG_ON(i915_active_is_idle(ref));
+
+	dma_fence_get(f);
+
+	rcu_read_lock();
+	if (rcu_access_pointer(ref->excl)) {
+		struct dma_fence *old;
+
+		old = dma_fence_get_rcu_safe(&ref->excl);
+		if (old) {
+			if (dma_fence_remove_callback(old, &ref->excl_cb))
+				atomic_dec(&ref->count);
+			dma_fence_put(old);
+		}
+	}
+	rcu_read_unlock();
+
+	atomic_inc(&ref->count);
+	rcu_assign_pointer(ref->excl, f);
+
+	if (dma_fence_add_callback(f, &ref->excl_cb, excl_cb)) {
+		RCU_INIT_POINTER(ref->excl, NULL);
+		atomic_dec(&ref->count);
+		dma_fence_put(f);
+	}
+}
+
 int i915_active_acquire(struct i915_active *ref)
 {
 	int err;
@@ -399,6 +442,25 @@ void i915_active_ungrab(struct i915_active *ref)
 	__active_ungrab(ref);
 }
 
+static int excl_wait(struct i915_active *ref)
+{
+	struct dma_fence *old;
+	int err = 0;
+
+	if (!rcu_access_pointer(ref->excl))
+		return 0;
+
+	rcu_read_lock();
+	old = dma_fence_get_rcu_safe(&ref->excl);
+	rcu_read_unlock();
+	if (old) {
+		err = dma_fence_wait(old, true);
+		dma_fence_put(old);
+	}
+
+	return err;
+}
+
 int i915_active_wait(struct i915_active *ref)
 {
 	struct active_node *it, *n;
@@ -419,6 +481,10 @@ int i915_active_wait(struct i915_active *ref)
 		return 0;
 	}
 
+	err = excl_wait(ref);
+	if (err)
+		goto out;
+
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
 			err = -EBUSY;
@@ -430,6 +496,7 @@ int i915_active_wait(struct i915_active *ref)
 			break;
 	}
 
+out:
 	__active_retire(ref);
 	if (err)
 		return err;
@@ -454,26 +521,22 @@ int i915_request_await_active_request(struct i915_request *rq,
 
 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 {
-	struct active_node *it, *n;
-	int err;
-
-	if (RB_EMPTY_ROOT(&ref->tree))
-		return 0;
+	int err = 0;
 
-	/* await allocates and so we need to avoid hitting the shrinker */
-	err = i915_active_acquire(ref);
-	if (err)
-		return err;
+	if (rcu_access_pointer(ref->excl)) {
+		struct dma_fence *fence;
 
-	mutex_lock(&ref->mutex);
-	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		err = i915_request_await_active_request(rq, &it->base);
-		if (err)
-			break;
+		rcu_read_lock();
+		fence = dma_fence_get_rcu_safe(&ref->excl);
+		rcu_read_unlock();
+		if (fence) {
+			err = i915_request_await_dma_fence(rq, fence);
+			dma_fence_put(fence);
+		}
 	}
-	mutex_unlock(&ref->mutex);
 
-	i915_active_release(ref);
+	/* In the future we may choose to await on all fences */
+
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 949c6835335b..90034f61b7c2 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -379,6 +379,13 @@ i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
 	return i915_active_ref(ref, i915_request_timeline(rq), rq);
 }
 
+void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
+
+static inline bool i915_active_has_exclusive(struct i915_active *ref)
+{
+	return rcu_access_pointer(ref->excl);
+}
+
 int i915_active_wait(struct i915_active *ref);
 
 int i915_request_await_active(struct i915_request *rq,
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 1854e7d168c1..86e7a232ea3c 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -8,6 +8,7 @@
 #define _I915_ACTIVE_TYPES_H_
 
 #include <linux/atomic.h>
+#include <linux/dma-fence.h>
 #include <linux/llist.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
@@ -51,6 +52,10 @@ struct i915_active {
 	struct mutex mutex;
 	atomic_t count;
 
+	/* Preallocated "exclusive" node */
+	struct dma_fence __rcu *excl;
+	struct dma_fence_cb excl_cb;
+
 	unsigned long flags;
 #define I915_ACTIVE_GRAB_BIT 0
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3306c6bb515a..5323e4fa55d9 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1858,10 +1858,8 @@ static int i915_drm_resume(struct drm_device *dev)
 	if (ret)
 		DRM_ERROR("failed to re-enable GGTT\n");
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_gem_restore_gtt_mappings(dev_priv);
 	i915_gem_restore_fences(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	intel_csr_ucode_resume(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7046067f70c1..f50058cf8ab8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -62,20 +62,31 @@
 #include "intel_pm.h"
 
 static int
-insert_mappable_node(struct i915_ggtt *ggtt,
-                     struct drm_mm_node *node, u32 size)
+insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size)
 {
+	int err;
+
+	err = mutex_lock_interruptible(&ggtt->vm.mutex);
+	if (err)
+		return err;
+
 	memset(node, 0, sizeof(*node));
-	return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
-					   size, 0, I915_COLOR_UNEVICTABLE,
-					   0, ggtt->mappable_end,
-					   DRM_MM_INSERT_LOW);
+	err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
+					  size, 0, I915_COLOR_UNEVICTABLE,
+					  0, ggtt->mappable_end,
+					  DRM_MM_INSERT_LOW);
+
+	mutex_unlock(&ggtt->vm.mutex);
+
+	return err;
 }
 
 static void
-remove_mappable_node(struct drm_mm_node *node)
+remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node)
 {
+	mutex_lock(&ggtt->vm.mutex);
 	drm_mm_remove_node(node);
+	mutex_unlock(&ggtt->vm.mutex);
 }
 
 int
@@ -87,7 +98,8 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	struct i915_vma *vma;
 	u64 pinned;
 
-	mutex_lock(&ggtt->vm.mutex);
+	if (mutex_lock_interruptible(&ggtt->vm.mutex))
+		return -EINTR;
 
 	pinned = ggtt->vm.reserved;
 	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
@@ -109,20 +121,24 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 	LIST_HEAD(still_in_list);
 	int ret = 0;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	spin_lock(&obj->vma.lock);
 	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
 						       struct i915_vma,
 						       obj_link))) {
+		struct i915_address_space *vm = vma->vm;
+
+		ret = -EBUSY;
+		if (!i915_vm_tryopen(vm))
+			break;
+
 		list_move_tail(&vma->obj_link, &still_in_list);
 		spin_unlock(&obj->vma.lock);
 
-		ret = -EBUSY;
 		if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
 		    !i915_vma_is_active(vma))
 			ret = i915_vma_unbind(vma);
 
+		i915_vm_close(vm);
 		spin_lock(&obj->vma.lock);
 	}
 	list_splice(&still_in_list, &obj->vma.list);
@@ -338,10 +354,6 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	u64 remain, offset;
 	int ret;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	vma = ERR_PTR(-ENODEV);
 	if (!i915_gem_object_is_tiled(obj))
@@ -355,12 +367,10 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	} else {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
-			goto out_unlock;
+			goto out_rpm;
 		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
@@ -414,17 +424,14 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
-	mutex_lock(&i915->drm.struct_mutex);
 	if (drm_mm_node_allocated(&node)) {
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
-		remove_mappable_node(&node);
+		remove_mappable_node(ggtt, &node);
 	} else {
 		i915_vma_unpin(vma);
 	}
-out_unlock:
+out_rpm:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return ret;
 }
 
@@ -531,10 +538,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	void __user *user_data;
 	int ret;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	if (i915_gem_object_has_struct_page(obj)) {
 		/*
 		 * Avoid waking the device up if we can fallback, as
@@ -544,10 +547,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		 * using the cache bypass of indirect GGTT access.
 		 */
 		wakeref = intel_runtime_pm_get_if_in_use(rpm);
-		if (!wakeref) {
-			ret = -EFAULT;
-			goto out_unlock;
-		}
+		if (!wakeref)
+			return -EFAULT;
 	} else {
 		/* No backing pages, no fallback, we must force GGTT access */
 		wakeref = intel_runtime_pm_get(rpm);
@@ -569,8 +570,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(!drm_mm_node_allocated(&node));
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
@@ -634,18 +633,15 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
-	mutex_lock(&i915->drm.struct_mutex);
 	intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 	if (drm_mm_node_allocated(&node)) {
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
-		remove_mappable_node(&node);
+		remove_mappable_node(ggtt, &node);
 	} else {
 		i915_vma_unpin(vma);
 	}
 out_rpm:
 	intel_runtime_pm_put(rpm, wakeref);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return ret;
 }
 
@@ -968,8 +964,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	if (i915_gem_object_never_bind_ggtt(obj))
 		return ERR_PTR(-ENODEV);
 
@@ -1019,13 +1013,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 				return ERR_PTR(-ENOSPC);
 		}
 
-		WARN(i915_vma_is_pinned(vma),
-		     "bo is already pinned in ggtt with incorrect alignment:"
-		     " offset=%08x, req.alignment=%llx,"
-		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
-		     i915_ggtt_offset(vma), alignment,
-		     !!(flags & PIN_MAPPABLE),
-		     i915_vma_is_map_and_fenceable(vma));
 		ret = i915_vma_unbind(vma);
 		if (ret)
 			return ERR_PTR(ret);
@@ -1444,8 +1431,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	}
 
 	if (ret == -EIO) {
-		mutex_lock(&dev_priv->drm.struct_mutex);
-
 		/*
 		 * Allow engines or uC initialisation to fail by marking the GPU
 		 * as wedged. But we only want to do this when the GPU is angry,
@@ -1462,8 +1447,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		i915_gem_restore_gtt_mappings(dev_priv);
 		i915_gem_restore_fences(dev_priv);
 		intel_init_clock_gating(dev_priv);
-
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 
 	i915_gem_drain_freed_objects(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 8c1e04f402bc..0552bf93eea3 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -47,8 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * bound by their active reference.
 	 */
 	return i915_gem_wait_for_idle(i915,
-				      I915_WAIT_INTERRUPTIBLE |
-				      I915_WAIT_LOCKED,
+				      I915_WAIT_INTERRUPTIBLE,
 				      MAX_SCHEDULE_TIMEOUT);
 }
 
@@ -104,7 +103,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	struct i915_vma *active;
 	int ret;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	trace_i915_gem_evict(vm, min_size, alignment, flags);
 
 	/*
@@ -127,15 +126,6 @@ i915_gem_evict_something(struct i915_address_space *vm,
 				    min_size, alignment, color,
 				    start, end, mode);
 
-	/*
-	 * Retire before we search the active list. Although we have
-	 * reasonable accuracy in our retirement lists, we may have
-	 * a stray pin (preventing eviction) that can only be resolved by
-	 * retiring.
-	 */
-	if (!(flags & PIN_NONBLOCK))
-		i915_retire_requests(dev_priv);
-
 search_again:
 	active = NULL;
 	INIT_LIST_HEAD(&eviction_list);
@@ -235,12 +225,12 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
 		__i915_vma_unpin(vma);
 		if (ret == 0)
-			ret = i915_vma_unbind(vma);
+			ret = __i915_vma_unbind(vma);
 	}
 
 	while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) {
 		vma = container_of(node, struct i915_vma, node);
-		ret = i915_vma_unbind(vma);
+		ret = __i915_vma_unbind(vma);
 	}
 
 	return ret;
@@ -268,7 +258,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 	struct i915_vma *vma, *next;
 	int ret = 0;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
 
@@ -349,7 +339,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
 		__i915_vma_unpin(vma);
 		if (ret == 0)
-			ret = i915_vma_unbind(vma);
+			ret = __i915_vma_unbind(vma);
 	}
 
 	return ret;
@@ -373,7 +363,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	struct i915_vma *vma, *next;
 	int ret;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
 	trace_i915_gem_evict_vm(vm);
 
 	/* Switch back to the default context in order to unpin
@@ -388,7 +378,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	}
 
 	INIT_LIST_HEAD(&eviction_list);
-	mutex_lock(&vm->mutex);
 	list_for_each_entry(vma, &vm->bound_list, vm_link) {
 		if (i915_vma_is_pinned(vma))
 			continue;
@@ -396,13 +385,12 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 		__i915_vma_pin(vma);
 		list_add(&vma->evict_link, &eviction_list);
 	}
-	mutex_unlock(&vm->mutex);
 
 	ret = 0;
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
 		__i915_vma_unpin(vma);
 		if (ret == 0)
-			ret = i915_vma_unbind(vma);
+			ret = __i915_vma_unbind(vma);
 	}
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 615a9f4ef30c..487b7261f7ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -230,14 +230,15 @@ static int fence_update(struct i915_fence_reg *fence,
 			 i915_gem_object_get_tiling(vma->obj)))
 			return -EINVAL;
 
-		ret = i915_active_wait(&vma->active);
+		ret = i915_vma_sync(vma);
 		if (ret)
 			return ret;
 	}
 
 	old = xchg(&fence->vma, NULL);
 	if (old) {
-		ret = i915_active_wait(&old->active);
+		/* XXX Ideally we would move the waiting to outside the mutex */
+		ret = i915_vma_sync(old);
 		if (ret) {
 			fence->vma = old;
 			return ret;
@@ -331,13 +332,15 @@ static struct i915_fence_reg *fence_find(struct drm_i915_private *i915)
 	return ERR_PTR(-EDEADLK);
 }
 
-static int __i915_vma_pin_fence(struct i915_vma *vma)
+int __i915_vma_pin_fence(struct i915_vma *vma)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
 	struct i915_fence_reg *fence;
 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 	int err;
 
+	lockdep_assert_held(&vma->vm->mutex);
+
 	/* Just update our place in the LRU if our fence is getting reused. */
 	if (vma->fence) {
 		fence = vma->fence;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8eba63ecdb03..55cebf256d03 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -150,16 +150,18 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
 
 static int ppgtt_bind_vma(struct i915_vma *vma,
 			  enum i915_cache_level cache_level,
-			  u32 unused)
+			  u32 flags)
 {
 	u32 pte_flags;
 	int err;
 
-	if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
+	if (flags & I915_VMA_ALLOC) {
 		err = vma->vm->allocate_va_range(vma->vm,
 						 vma->node.start, vma->size);
 		if (err)
 			return err;
+
+		set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
 	}
 
 	/* Applicable to VLV, and gen8+ */
@@ -167,6 +169,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 	if (i915_gem_object_is_readonly(vma->obj))
 		pte_flags |= PTE_READ_ONLY;
 
+	GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
 	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
 	wmb();
 
@@ -175,7 +178,8 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 
 static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
-	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
+		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 }
 
 static int ppgtt_set_pages(struct i915_vma *vma)
@@ -503,15 +507,26 @@ static void i915_address_space_fini(struct i915_address_space *vm)
 	mutex_destroy(&vm->mutex);
 }
 
-static void ppgtt_destroy_vma(struct i915_address_space *vm)
+void __i915_vm_close(struct i915_address_space *vm)
 {
 	struct i915_vma *vma, *vn;
 
-	mutex_lock(&vm->i915->drm.struct_mutex);
-	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link)
+	mutex_lock(&vm->mutex);
+	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		/* Keep the obj (and hence the vma) alive as _we_ destroy it */
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
+		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+		WARN_ON(__i915_vma_unbind(vma));
 		i915_vma_destroy(vma);
+
+		i915_gem_object_put(obj);
+	}
 	GEM_BUG_ON(!list_empty(&vm->bound_list));
-	mutex_unlock(&vm->i915->drm.struct_mutex);
+	mutex_unlock(&vm->mutex);
 }
 
 static void __i915_vm_release(struct work_struct *work)
@@ -519,8 +534,6 @@ static void __i915_vm_release(struct work_struct *work)
 	struct i915_address_space *vm =
 		container_of(work, struct i915_address_space, rcu.work);
 
-	ppgtt_destroy_vma(vm);
-
 	vm->cleanup(vm);
 	i915_address_space_fini(vm);
 
@@ -535,7 +548,6 @@ void i915_vm_release(struct kref *kref)
 	GEM_BUG_ON(i915_is_ggtt(vm));
 	trace_i915_ppgtt_release(vm);
 
-	vm->closed = true;
 	queue_rcu_work(vm->i915->wq, &vm->rcu);
 }
 
@@ -543,6 +555,7 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 {
 	kref_init(&vm->ref);
 	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
+	atomic_set(&vm->open, 1);
 
 	/*
 	 * The vm->mutex must be reclaim safe (for use in the shrinker).
@@ -1771,12 +1784,8 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
-	struct drm_i915_private *i915 = vm->i915;
 
-	/* FIXME remove the struct_mutex to bring the locking under control */
-	mutex_lock(&i915->drm.struct_mutex);
 	i915_vma_destroy(ppgtt->vma);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	gen6_ppgtt_free_pd(ppgtt);
 	free_scratch(vm);
@@ -1865,7 +1874,8 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 
 	i915_active_init(i915, &vma->active, NULL, NULL);
 
-	vma->vm = &ggtt->vm;
+	mutex_init(&vma->pages_mutex);
+	vma->vm = i915_vm_get(&ggtt->vm);
 	vma->ops = &pd_vma_ops;
 	vma->private = ppgtt;
 
@@ -1885,7 +1895,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
 	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 	int err = 0;
 
-	GEM_BUG_ON(ppgtt->base.vm.closed);
+	GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
 
 	/*
 	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
@@ -2463,14 +2473,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_LOCAL_BIND) {
 		struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
 
-		if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
+		if (flags & I915_VMA_ALLOC) {
 			ret = alias->vm.allocate_va_range(&alias->vm,
 							  vma->node.start,
 							  vma->size);
 			if (ret)
 				return ret;
+
+			set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
 		}
 
+		GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
+				     __i915_vma_flags(vma)));
 		alias->vm.insert_entries(&alias->vm, vma,
 					 cache_level, pte_flags);
 	}
@@ -2499,7 +2513,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 			vm->clear_range(vm, vma->node.start, vma->size);
 	}
 
-	if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) {
+	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
 		struct i915_address_space *vm =
 			&i915_vm_to_ggtt(vma->vm)->alias->vm;
 
@@ -2602,22 +2616,16 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
 
 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
 {
-	struct drm_i915_private *i915 = ggtt->vm.i915;
 	struct i915_ppgtt *ppgtt;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	ppgtt = fetch_and_zero(&ggtt->alias);
 	if (!ppgtt)
-		goto out;
+		return;
 
 	i915_vm_put(&ppgtt->vm);
 
 	ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
 	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
-
-out:
-	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
@@ -2734,32 +2742,28 @@ int i915_init_ggtt(struct drm_i915_private *i915)
 
 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
 {
-	struct drm_i915_private *i915 = ggtt->vm.i915;
 	struct i915_vma *vma, *vn;
 
-	ggtt->vm.closed = true;
+	atomic_set(&ggtt->vm.open, 0);
 
 	rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
-	flush_workqueue(i915->wq);
+	flush_workqueue(ggtt->vm.i915->wq);
 
-	mutex_lock(&i915->drm.struct_mutex);
+	mutex_lock(&ggtt->vm.mutex);
 
 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
-		WARN_ON(i915_vma_unbind(vma));
+		WARN_ON(__i915_vma_unbind(vma));
 
 	if (drm_mm_node_allocated(&ggtt->error_capture))
 		drm_mm_remove_node(&ggtt->error_capture);
 
 	ggtt_release_guc_top(ggtt);
-
-	if (drm_mm_initialized(&ggtt->vm.mm)) {
-		intel_vgt_deballoon(ggtt);
-		i915_address_space_fini(&ggtt->vm);
-	}
+	intel_vgt_deballoon(ggtt);
 
 	ggtt->vm.cleanup(&ggtt->vm);
 
-	mutex_unlock(&i915->drm.struct_mutex);
+	mutex_unlock(&ggtt->vm.mutex);
+	i915_address_space_fini(&ggtt->vm);
 
 	arch_phys_wc_del(ggtt->mtrr);
 	io_mapping_fini(&ggtt->iomap);
@@ -3188,9 +3192,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
 static int ggtt_init_hw(struct i915_ggtt *ggtt)
 {
 	struct drm_i915_private *i915 = ggtt->vm.i915;
-	int ret = 0;
-
-	mutex_lock(&i915->drm.struct_mutex);
 
 	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
 
@@ -3206,18 +3207,14 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt)
 				ggtt->gmadr.start,
 				ggtt->mappable_end)) {
 		ggtt->vm.cleanup(&ggtt->vm);
-		ret = -EIO;
-		goto out;
+		return -EIO;
 	}
 
 	ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
 
 	i915_ggtt_init_fences(ggtt);
 
-out:
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return ret;
+	return 0;
 }
 
 /**
@@ -3289,6 +3286,7 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
 {
 	struct i915_vma *vma, *vn;
 	bool flush = false;
+	int open;
 
 	intel_gt_check_and_clear_faults(ggtt->vm.gt);
 
@@ -3296,7 +3294,9 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
 
 	/* First fill our portion of the GTT with scratch pages */
 	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
-	ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
+
+	/* Skip rewriting PTE on VMA unbind. */
+	open = atomic_xchg(&ggtt->vm.open, 0);
 
 	/* clflush objects bound into the GGTT and rebind them. */
 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
@@ -3305,24 +3305,20 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
 		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 			continue;
 
-		mutex_unlock(&ggtt->vm.mutex);
-
-		if (!i915_vma_unbind(vma))
-			goto lock;
+		if (!__i915_vma_unbind(vma))
+			continue;
 
+		clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
 		WARN_ON(i915_vma_bind(vma,
 				      obj ? obj->cache_level : 0,
-				      PIN_UPDATE));
+				      PIN_GLOBAL, NULL));
 		if (obj) { /* only used during resume => exclusive access */
 			flush |= fetch_and_zero(&obj->write_domain);
 			obj->read_domains |= I915_GEM_DOMAIN_GTT;
 		}
-
-lock:
-		mutex_lock(&ggtt->vm.mutex);
 	}
 
-	ggtt->vm.closed = false;
+	atomic_set(&ggtt->vm.open, open);
 	ggtt->invalidate(ggtt);
 
 	mutex_unlock(&ggtt->vm.mutex);
@@ -3714,7 +3710,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	u64 offset;
 	int err;
 
-	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vm->mutex);
+
 	GEM_BUG_ON(!size);
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 3502b9c85a8e..0a18fdfe63ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -307,7 +307,14 @@ struct i915_address_space {
 
 	unsigned int bind_async_flags;
 
-	bool closed;
+	/*
+	 * Each active user context has its own address space (in full-ppgtt).
+	 * Since the vm may be shared between multiple contexts, we count how
+	 * many contexts keep us "open". Once open hits zero, we are closed
+	 * and do not allow any new attachments, and proceed to shutdown our
+	 * vma and page directories.
+	 */
+	atomic_t open;
 
 	struct mutex mutex; /* protects vma and our lists */
 #define VM_CLASS_GGTT 0
@@ -581,6 +588,35 @@ static inline void i915_vm_put(struct i915_address_space *vm)
 	kref_put(&vm->ref, i915_vm_release);
 }
 
+static inline struct i915_address_space *
+i915_vm_open(struct i915_address_space *vm)
+{
+	GEM_BUG_ON(!atomic_read(&vm->open));
+	atomic_inc(&vm->open);
+	return i915_vm_get(vm);
+}
+
+static inline bool
+i915_vm_tryopen(struct i915_address_space *vm)
+{
+	if (atomic_add_unless(&vm->open, 1, 0))
+		return i915_vm_get(vm);
+
+	return false;
+}
+
+void __i915_vm_close(struct i915_address_space *vm);
+
+static inline void
+i915_vm_close(struct i915_address_space *vm)
+{
+	GEM_BUG_ON(!atomic_read(&vm->open));
+	if (atomic_dec_and_test(&vm->open))
+		__i915_vm_close(vm);
+
+	i915_vm_put(vm);
+}
+
 int gen6_ppgtt_pin(struct i915_ppgtt *base);
 void gen6_ppgtt_unpin(struct i915_ppgtt *base);
 void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
@@ -613,10 +649,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 #define PIN_OFFSET_BIAS		BIT_ULL(6)
 #define PIN_OFFSET_FIXED	BIT_ULL(7)
 
-#define PIN_MBZ			BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */
-#define PIN_GLOBAL		BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */
-#define PIN_USER		BIT_ULL(10) /* I915_VMA_LOCAL_BIND */
-#define PIN_UPDATE		BIT_ULL(11)
+#define PIN_UPDATE		BIT_ULL(9)
+#define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
+#define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
 
 #define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 524f6710b7aa..80055501eccb 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1204,15 +1204,10 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
 {
 	struct i915_gem_engines_iter it;
-	struct drm_i915_private *i915 = stream->dev_priv;
 	struct i915_gem_context *ctx = stream->ctx;
 	struct intel_context *ce;
 	int err;
 
-	err = i915_mutex_lock_interruptible(&i915->drm);
-	if (err)
-		return ERR_PTR(err);
-
 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		if (ce->engine->class != RENDER_CLASS)
 			continue;
@@ -1229,10 +1224,6 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
 	}
 	i915_gem_context_unlock_engines(ctx);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err)
-		return ERR_PTR(err);
-
 	return stream->pinned_ctx;
 }
 
@@ -1331,32 +1322,22 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
  */
 static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
 	struct intel_context *ce;
 
 	stream->specific_ctx_id = INVALID_CTX_ID;
 	stream->specific_ctx_id_mask = 0;
 
 	ce = fetch_and_zero(&stream->pinned_ctx);
-	if (ce) {
-		mutex_lock(&dev_priv->drm.struct_mutex);
+	if (ce)
 		intel_context_unpin(ce);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
-	}
 }
 
 static void
 free_oa_buffer(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *i915 = stream->dev_priv;
-
-	mutex_lock(&i915->drm.struct_mutex);
-
 	i915_vma_unpin_and_release(&stream->oa_buffer.vma,
 				   I915_VMA_RELEASE_MAP);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	stream->oa_buffer.vaddr = NULL;
 }
 
@@ -1511,18 +1492,13 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 	if (WARN_ON(stream->oa_buffer.vma))
 		return -ENODEV;
 
-	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
-	if (ret)
-		return ret;
-
 	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
 	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
 
 	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
 	if (IS_ERR(bo)) {
 		DRM_ERROR("Failed to allocate OA buffer\n");
-		ret = PTR_ERR(bo);
-		goto unlock;
+		return PTR_ERR(bo);
 	}
 
 	i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
@@ -1546,7 +1522,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 			 i915_ggtt_offset(stream->oa_buffer.vma),
 			 stream->oa_buffer.vaddr);
 
-	goto unlock;
+	return 0;
 
 err_unpin:
 	__i915_vma_unpin(vma);
@@ -1557,8 +1533,6 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 	stream->oa_buffer.vaddr = NULL;
 	stream->oa_buffer.vma = NULL;
 
-unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d097f77890ba..fe91a0e47b88 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -32,6 +32,7 @@
 
 #include "i915_drv.h"
 #include "i915_globals.h"
+#include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_vma.h"
 
@@ -110,7 +111,8 @@ vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	vma->vm = vm;
+	mutex_init(&vma->pages_mutex);
+	vma->vm = i915_vm_get(vm);
 	vma->ops = &vm->vma_ops;
 	vma->obj = obj;
 	vma->resv = obj->base.resv;
@@ -261,8 +263,6 @@ vma_lookup(struct drm_i915_gem_object *obj,
  * Once created, the VMA is kept until either the object is freed, or the
  * address space is closed.
  *
- * Must be called with struct_mutex held.
- *
  * Returns the vma, or an error pointer.
  */
 struct i915_vma *
@@ -273,7 +273,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
-	GEM_BUG_ON(vm->closed);
+	GEM_BUG_ON(!atomic_read(&vm->open));
 
 	spin_lock(&obj->vma.lock);
 	vma = vma_lookup(obj, vm, view);
@@ -287,18 +287,63 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
+struct i915_vma_work {
+	struct dma_fence_work base;
+	struct i915_vma *vma;
+	enum i915_cache_level cache_level;
+	unsigned int flags;
+};
+
+static int __vma_bind(struct dma_fence_work *work)
+{
+	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
+	struct i915_vma *vma = vw->vma;
+	int err;
+
+	err = vma->ops->bind_vma(vma, vw->cache_level, vw->flags);
+	if (err)
+		atomic_or(I915_VMA_ERROR, &vma->flags);
+
+	if (vma->obj)
+		__i915_gem_object_unpin_pages(vma->obj);
+
+	return err;
+}
+
+static const struct dma_fence_work_ops bind_ops = {
+	.name = "bind",
+	.work = __vma_bind,
+};
+
+struct i915_vma_work *i915_vma_work(void)
+{
+	struct i915_vma_work *vw;
+
+	vw = kzalloc(sizeof(*vw), GFP_KERNEL);
+	if (!vw)
+		return NULL;
+
+	dma_fence_work_init(&vw->base, &bind_ops);
+	vw->base.dma.error = -EAGAIN; /* disable the worker by default */
+
+	return vw;
+}
+
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
  * @cache_level: mapping cache level
  * @flags: flags like global or local mapping
+ * @work: preallocated worker for allocating and binding the PTE
  *
  * DMA addresses are taken from the scatter-gather table of this object (or of
  * this VMA in case of non-default GGTT views) and PTE entries set up.
  * Note that DMA addresses are also the only part of the SG table we care about.
  */
-int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
-		  u32 flags)
+int i915_vma_bind(struct i915_vma *vma,
+		  enum i915_cache_level cache_level,
+		  u32 flags,
+		  struct i915_vma_work *work)
 {
 	u32 bind_flags;
 	u32 vma_flags;
@@ -315,11 +360,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	if (GEM_DEBUG_WARN_ON(!flags))
 		return -EINVAL;
 
-	bind_flags = 0;
-	if (flags & PIN_GLOBAL)
-		bind_flags |= I915_VMA_GLOBAL_BIND;
-	if (flags & PIN_USER)
-		bind_flags |= I915_VMA_LOCAL_BIND;
+	bind_flags = flags;
+	bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 
 	vma_flags = atomic_read(&vma->flags);
 	vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
@@ -333,9 +375,32 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 	GEM_BUG_ON(!vma->pages);
 
 	trace_i915_vma_bind(vma, bind_flags);
-	ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
-	if (ret)
-		return ret;
+	if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) {
+		work->vma = vma;
+		work->cache_level = cache_level;
+		work->flags = bind_flags | I915_VMA_ALLOC;
+
+		/*
+		 * Note we only want to chain up to the migration fence on
+		 * the pages (not the object itself). As we don't track that,
+		 * yet, we have to use the exclusive fence instead.
+		 *
+		 * Also note that we do not want to track the async vma as
+		 * part of the obj->resv->excl_fence as it only affects
+		 * execution and not content or object's backing store lifetime.
+		 */
+		GEM_BUG_ON(i915_active_has_exclusive(&vma->active));
+		i915_active_set_exclusive(&vma->active, &work->base.dma);
+		work->base.dma.error = 0; /* enable the queue_work() */
+
+		if (vma->obj)
+			__i915_gem_object_pin_pages(vma->obj);
+	} else {
+		GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_async_flags);
+		ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
+		if (ret)
+			return ret;
+	}
 
 	atomic_or(bind_flags, &vma->flags);
 	return 0;
@@ -348,9 +413,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
 	/* Access through the GTT requires the device to be awake. */
 	assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm);
-
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-	if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
+	if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
 		err = -ENODEV;
 		goto err;
 	}
@@ -358,7 +421,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
 
-	ptr = vma->iomap;
+	ptr = READ_ONCE(vma->iomap);
 	if (ptr == NULL) {
 		ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
 					vma->node.start,
@@ -368,7 +431,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 			goto err;
 		}
 
-		vma->iomap = ptr;
+		if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) {
+			io_mapping_unmap(ptr);
+			ptr = vma->iomap;
+		}
 	}
 
 	__i915_vma_pin(vma);
@@ -388,18 +454,12 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
 void i915_vma_flush_writes(struct i915_vma *vma)
 {
-	if (!i915_vma_has_ggtt_write(vma))
-		return;
-
-	intel_gt_flush_ggtt_writes(vma->vm->gt);
-
-	i915_vma_unset_ggtt_write(vma);
+	if (i915_vma_unset_ggtt_write(vma))
+		intel_gt_flush_ggtt_writes(vma->vm->gt);
 }
 
 void i915_vma_unpin_iomap(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
 	GEM_BUG_ON(vma->iomap == NULL);
 
 	i915_vma_flush_writes(vma);
@@ -435,6 +495,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 	if (!drm_mm_node_allocated(&vma->node))
 		return false;
 
+	if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma)))
+		return true;
+
 	if (vma->node.size < size)
 		return true;
 
@@ -535,7 +598,6 @@ static void assert_bind_count(const struct drm_i915_gem_object *obj)
 static int
 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-	struct drm_i915_private *dev_priv = vma->vm->i915;
 	unsigned long color;
 	u64 start, end;
 	int ret;
@@ -561,7 +623,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 
 	end = vma->vm->total;
 	if (flags & PIN_MAPPABLE)
-		end = min_t(u64, end, dev_priv->ggtt.mappable_end);
+		end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end);
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
 	GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
@@ -578,34 +640,20 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	}
 
 	color = 0;
-	if (vma->obj) {
-		ret = i915_gem_object_pin_pages(vma->obj);
-		if (ret)
-			return ret;
-
-		if (i915_vm_has_cache_coloring(vma->vm))
-			color = vma->obj->cache_level;
-	}
-
-	GEM_BUG_ON(vma->pages);
-
-	ret = vma->ops->set_pages(vma);
-	if (ret)
-		goto err_unpin;
+	if (vma->obj && i915_vm_has_cache_coloring(vma->vm))
+		color = vma->obj->cache_level;
 
 	if (flags & PIN_OFFSET_FIXED) {
 		u64 offset = flags & PIN_OFFSET_MASK;
 		if (!IS_ALIGNED(offset, alignment) ||
-		    range_overflows(offset, size, end)) {
-			ret = -EINVAL;
-			goto err_clear;
-		}
+		    range_overflows(offset, size, end))
+			return -EINVAL;
 
 		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
 					   size, offset, color,
 					   flags);
 		if (ret)
-			goto err_clear;
+			return ret;
 	} else {
 		/*
 		 * We only support huge gtt pages through the 48b PPGTT,
@@ -644,7 +692,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 					  size, alignment, color,
 					  start, end, flags);
 		if (ret)
-			goto err_clear;
+			return ret;
 
 		GEM_BUG_ON(vma->node.start < start);
 		GEM_BUG_ON(vma->node.start + vma->node.size > end);
@@ -652,23 +700,15 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
 
-	mutex_lock(&vma->vm->mutex);
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
-	mutex_unlock(&vma->vm->mutex);
 
 	if (vma->obj) {
+		atomic_inc(&vma->obj->mm.pages_pin_count);
 		atomic_inc(&vma->obj->bind_count);
 		assert_bind_count(vma->obj);
 	}
 
 	return 0;
-
-err_clear:
-	vma->ops->clear_pages(vma);
-err_unpin:
-	if (vma->obj)
-		i915_gem_object_unpin_pages(vma->obj);
-	return ret;
 }
 
 static void
@@ -677,12 +717,7 @@ i915_vma_remove(struct i915_vma *vma)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 
-	vma->ops->clear_pages(vma);
-
-	mutex_lock(&vma->vm->mutex);
-	drm_mm_remove_node(&vma->node);
 	list_del(&vma->vm_link);
-	mutex_unlock(&vma->vm->mutex);
 
 	/*
 	 * Since the unbound list is global, only move to that list if
@@ -701,51 +736,211 @@ i915_vma_remove(struct i915_vma *vma)
 		i915_gem_object_unpin_pages(obj);
 		assert_bind_count(obj);
 	}
+
+	drm_mm_remove_node(&vma->node);
 }
 
-int __i915_vma_do_pin(struct i915_vma *vma,
-		      u64 size, u64 alignment, u64 flags)
+static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
 {
-	const unsigned int bound = atomic_read(&vma->flags);
-	int ret;
+	unsigned int bound;
+	bool pinned = true;
 
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
-	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
+	bound = atomic_read(&vma->flags);
+	do {
+		if (unlikely(flags & ~bound))
+			return false;
 
-	if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
-		ret = -EBUSY;
-		goto err_unpin;
+		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
+			return false;
+
+		if (!(bound & I915_VMA_PIN_MASK))
+			goto unpinned;
+
+		GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0);
+	} while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
+
+	return true;
+
+unpinned:
+	/*
+	 * If pin_count==0, but we are bound, check under the lock to avoid
+	 * racing with a concurrent i915_vma_unbind().
+	 */
+	mutex_lock(&vma->vm->mutex);
+	do {
+		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
+			pinned = false;
+			break;
+		}
+
+		if (unlikely(flags & ~bound)) {
+			pinned = false;
+			break;
+		}
+	} while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
+	mutex_unlock(&vma->vm->mutex);
+
+	return pinned;
+}
+
+static int vma_get_pages(struct i915_vma *vma)
+{
+	int err = 0;
+
+	if (atomic_add_unless(&vma->pages_count, 1, 0))
+		return 0;
+
+	/* Allocations ahoy! */
+	if (mutex_lock_interruptible(&vma->pages_mutex))
+		return -EINTR;
+
+	if (!atomic_read(&vma->pages_count)) {
+		if (vma->obj) {
+			err = i915_gem_object_pin_pages(vma->obj);
+			if (err)
+				goto unlock;
+		}
+
+		err = vma->ops->set_pages(vma);
+		if (err)
+			goto unlock;
 	}
+	atomic_inc(&vma->pages_count);
 
-	if ((bound & I915_VMA_BIND_MASK) == 0) {
-		ret = i915_vma_insert(vma, size, alignment, flags);
-		if (ret)
-			goto err_unpin;
+unlock:
+	mutex_unlock(&vma->pages_mutex);
+
+	return err;
+}
+
+static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
+{
+	/* We allocate under vma_get_pages, so beware the shrinker */
+	mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING);
+	GEM_BUG_ON(atomic_read(&vma->pages_count) < count);
+	if (atomic_sub_return(count, &vma->pages_count) == 0) {
+		vma->ops->clear_pages(vma);
+		GEM_BUG_ON(vma->pages);
+		if (vma->obj)
+			i915_gem_object_unpin_pages(vma->obj);
 	}
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+	mutex_unlock(&vma->pages_mutex);
+}
 
-	ret = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0, flags);
-	if (ret)
-		goto err_remove;
+static void vma_put_pages(struct i915_vma *vma)
+{
+	if (atomic_add_unless(&vma->pages_count, -1, 1))
+		return;
+
+	__vma_put_pages(vma, 1);
+}
+
+static void vma_unbind_pages(struct i915_vma *vma)
+{
+	unsigned int count;
+
+	lockdep_assert_held(&vma->vm->mutex);
+
+	/* The upper portion of pages_count is the number of bindings */
+	count = atomic_read(&vma->pages_count);
+	count >>= I915_VMA_PAGES_BIAS;
+	GEM_BUG_ON(!count);
+
+	__vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
+}
+
+int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+{
+	struct i915_vma_work *work = NULL;
+	unsigned int bound;
+	int err;
+
+	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
+	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
+
+	GEM_BUG_ON(flags & PIN_UPDATE);
+	GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
+
+	/* First try and grab the pin without rebinding the vma */
+	if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
+		return 0;
+
+	err = vma_get_pages(vma);
+	if (err)
+		return err;
+
+	if (flags & vma->vm->bind_async_flags) {
+		work = i915_vma_work();
+		if (!work) {
+			err = -ENOMEM;
+			goto err_pages;
+		}
+	}
+
+	/* No more allocations allowed once we hold vm->mutex */
+	err = mutex_lock_interruptible(&vma->vm->mutex);
+	if (err)
+		goto err_fence;
+
+	bound = atomic_read(&vma->flags);
+	if (unlikely(bound & I915_VMA_ERROR)) {
+		err = -ENOMEM;
+		goto err_unlock;
+	}
+
+	if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
+		err = -EAGAIN; /* pins are meant to be fairly temporary */
+		goto err_unlock;
+	}
+
+	if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) {
+		__i915_vma_pin(vma);
+		goto err_unlock;
+	}
+
+	err = i915_active_acquire(&vma->active);
+	if (err)
+		goto err_unlock;
+
+	if (!(bound & I915_VMA_BIND_MASK)) {
+		err = i915_vma_insert(vma, size, alignment, flags);
+		if (err)
+			goto err_active;
+
+		if (i915_is_ggtt(vma->vm))
+			__i915_vma_set_map_and_fenceable(vma);
+	}
 
-	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_BIND_MASK));
+	GEM_BUG_ON(!vma->pages);
+	err = i915_vma_bind(vma,
+			    vma->obj ? vma->obj->cache_level : 0,
+			    flags, work);
+	if (err)
+		goto err_remove;
 
-	if ((bound ^ atomic_read(&vma->flags)) & I915_VMA_GLOBAL_BIND)
-		__i915_vma_set_map_and_fenceable(vma);
+	/* There should only be at most 2 active bindings (user, global) */
+	GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound);
+	atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 
+	__i915_vma_pin(vma);
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+	GEM_BUG_ON(!i915_vma_is_bound(vma, flags));
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
-	return 0;
 
 err_remove:
-	if ((bound & I915_VMA_BIND_MASK) == 0) {
+	if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK))
 		i915_vma_remove(vma);
-		GEM_BUG_ON(vma->pages);
-		GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_BIND_MASK);
-	}
-err_unpin:
-	__i915_vma_unpin(vma);
-	return ret;
+err_active:
+	i915_active_release(&vma->active);
+err_unlock:
+	mutex_unlock(&vma->vm->mutex);
+err_fence:
+	if (work)
+		dma_fence_work_commit(&work->base);
+err_pages:
+	vma_put_pages(vma);
+	return err;
 }
 
 void i915_vma_close(struct i915_vma *vma)
@@ -776,9 +971,6 @@ static void __i915_vma_remove_closed(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
 
-	if (!i915_vma_is_closed(vma))
-		return;
-
 	spin_lock_irq(&i915->gt.closed_lock);
 	list_del_init(&vma->closed_link);
 	spin_unlock_irq(&i915->gt.closed_lock);
@@ -786,40 +978,35 @@ static void __i915_vma_remove_closed(struct i915_vma *vma)
 
 void i915_vma_reopen(struct i915_vma *vma)
 {
-	__i915_vma_remove_closed(vma);
+	if (i915_vma_is_closed(vma))
+		__i915_vma_remove_closed(vma);
 }
 
-static void __i915_vma_destroy(struct i915_vma *vma)
+void i915_vma_destroy(struct i915_vma *vma)
 {
-	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
-	GEM_BUG_ON(vma->fence);
+	if (drm_mm_node_allocated(&vma->node)) {
+		mutex_lock(&vma->vm->mutex);
+		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+		WARN_ON(__i915_vma_unbind(vma));
+		mutex_unlock(&vma->vm->mutex);
+		GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
+	}
+	GEM_BUG_ON(i915_vma_is_active(vma));
 
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		spin_lock(&obj->vma.lock);
 		list_del(&vma->obj_link);
-		rb_erase(&vma->obj_node, &vma->obj->vma.tree);
+		rb_erase(&vma->obj_node, &obj->vma.tree);
 		spin_unlock(&obj->vma.lock);
 	}
 
-	i915_active_fini(&vma->active);
-
-	i915_vma_free(vma);
-}
-
-void i915_vma_destroy(struct i915_vma *vma)
-{
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
-	GEM_BUG_ON(i915_vma_is_pinned(vma));
-
 	__i915_vma_remove_closed(vma);
+	i915_vm_put(vma->vm);
 
-	WARN_ON(i915_vma_unbind(vma));
-	GEM_BUG_ON(i915_vma_is_active(vma));
-
-	__i915_vma_destroy(vma);
+	i915_active_fini(&vma->active);
+	i915_vma_free(vma);
 }
 
 void i915_vma_parked(struct drm_i915_private *i915)
@@ -828,12 +1015,32 @@ void i915_vma_parked(struct drm_i915_private *i915)
 
 	spin_lock_irq(&i915->gt.closed_lock);
 	list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
-		list_del_init(&vma->closed_link);
+		struct drm_i915_gem_object *obj = vma->obj;
+		struct i915_address_space *vm = vma->vm;
+
+		/* XXX All to avoid keeping a reference on i915_vma itself */
+
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
+		if (!i915_vm_tryopen(vm)) {
+			i915_gem_object_put(obj);
+			obj = NULL;
+		}
+
 		spin_unlock_irq(&i915->gt.closed_lock);
 
-		i915_vma_destroy(vma);
+		if (obj) {
+			i915_vma_destroy(vma);
+			i915_gem_object_put(obj);
+		}
 
+		i915_vm_close(vm);
+
+		/* Restart after dropping lock */
 		spin_lock_irq(&i915->gt.closed_lock);
+		next = list_first_entry(&i915->gt.closed_vma,
+					typeof(*next), closed_link);
 	}
 	spin_unlock_irq(&i915->gt.closed_lock);
 }
@@ -873,6 +1080,20 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
 		list_del(&vma->obj->userfault_link);
 }
 
+int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
+{
+	int err;
+
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+
+	/* Wait for the vma to be bound before we start! */
+	err = i915_request_await_active(rq, &vma->active);
+	if (err)
+		return err;
+
+	return i915_active_add_request(&vma->active, rq);
+}
+
 int i915_vma_move_to_active(struct i915_vma *vma,
 			    struct i915_request *rq,
 			    unsigned int flags)
@@ -880,19 +1101,9 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	struct drm_i915_gem_object *obj = vma->obj;
 	int err;
 
-	assert_vma_held(vma);
 	assert_object_held(obj);
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-	/*
-	 * Add a reference if we're newly entering the active list.
-	 * The order in which we add operations to the retirement queue is
-	 * vital here: mark_active adds to the start of the callback list,
-	 * such that subsequent callbacks are called first. Therefore we
-	 * add the active reference first and queue for it to be dropped
-	 * *last*.
-	 */
-	err = i915_active_add_request(&vma->active, rq);
+	err = __i915_vma_move_to_active(vma, rq);
 	if (unlikely(err))
 		return err;
 
@@ -918,38 +1129,23 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	return 0;
 }
 
-int i915_vma_unbind(struct i915_vma *vma)
+int __i915_vma_unbind(struct i915_vma *vma)
 {
 	int ret;
 
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	lockdep_assert_held(&vma->vm->mutex);
 
 	/*
 	 * First wait upon any activity as retiring the request may
 	 * have side-effects such as unpinning or even unbinding this vma.
+	 *
+	 * XXX Actually waiting under the vm->mutex is a hinderance and
+	 * should be pipelined wherever possible. In cases where that is
+	 * unavoidable, we should lift the wait to before the mutex.
 	 */
-	might_sleep();
-	if (i915_vma_is_active(vma)) {
-		/*
-		 * When a closed VMA is retired, it is unbound - eek.
-		 * In order to prevent it from being recursively closed,
-		 * take a pin on the vma so that the second unbind is
-		 * aborted.
-		 *
-		 * Even more scary is that the retire callback may free
-		 * the object (last active vma). To prevent the explosion
-		 * we defer the actual object free to a worker that can
-		 * only proceed once it acquires the struct_mutex (which
-		 * we currently hold, therefore it cannot free this object
-		 * before we are finished).
-		 */
-		__i915_vma_pin(vma);
-		ret = i915_active_wait(&vma->active);
-		__i915_vma_unpin(vma);
-		if (ret)
-			return ret;
-	}
-	GEM_BUG_ON(i915_vma_is_active(vma));
+	ret = i915_vma_sync(vma);
+	if (ret)
+		return ret;
 
 	if (i915_vma_is_pinned(vma)) {
 		vma_print_allocator(vma, "is pinned");
@@ -970,16 +1166,12 @@ int i915_vma_unbind(struct i915_vma *vma)
 		GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
 
 		/* release the fence reg _after_ flushing */
-		mutex_lock(&vma->vm->mutex);
 		ret = i915_vma_revoke_fence(vma);
-		mutex_unlock(&vma->vm->mutex);
 		if (ret)
 			return ret;
 
 		/* Force a pagefault for domain tracking on next user access */
-		mutex_lock(&vma->vm->mutex);
 		i915_vma_revoke_mmap(vma);
-		mutex_unlock(&vma->vm->mutex);
 
 		__i915_vma_iounmap(vma);
 		clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
@@ -987,17 +1179,33 @@ int i915_vma_unbind(struct i915_vma *vma)
 	GEM_BUG_ON(vma->fence);
 	GEM_BUG_ON(i915_vma_has_userfault(vma));
 
-	if (likely(!vma->vm->closed)) {
+	if (likely(atomic_read(&vma->vm->open))) {
 		trace_i915_vma_unbind(vma);
 		vma->ops->unbind_vma(vma);
 	}
-	atomic_and(~I915_VMA_BIND_MASK, &vma->flags);
+	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags);
 
+	vma_unbind_pages(vma);
 	i915_vma_remove(vma);
 
 	return 0;
 }
 
+int i915_vma_unbind(struct i915_vma *vma)
+{
+	struct i915_address_space *vm = vma->vm;
+	int err;
+
+	err = mutex_lock_interruptible(&vm->mutex);
+	if (err)
+		return err;
+
+	err = __i915_vma_unbind(vma);
+	mutex_unlock(&vm->mutex);
+
+	return err;
+}
+
 struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
 {
 	i915_gem_object_make_unshrinkable(vma->obj);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index e49b199f7de7..858908e3d1cc 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -96,25 +96,28 @@ struct i915_vma {
 	 * exclusive cachelines of a single page, so a maximum of 64 possible
 	 * users.
 	 */
-#define I915_VMA_PIN_MASK 0xff
-#define I915_VMA_PIN_OVERFLOW_BIT 8
-#define I915_VMA_PIN_OVERFLOW	((int)BIT(I915_VMA_PIN_OVERFLOW_BIT))
+#define I915_VMA_PIN_MASK 0x3ff
+#define I915_VMA_OVERFLOW 0x200
 
 	/** Flags and address space this VMA is bound to */
-#define I915_VMA_GLOBAL_BIND_BIT 9
-#define I915_VMA_LOCAL_BIND_BIT 10
+#define I915_VMA_GLOBAL_BIND_BIT 10
+#define I915_VMA_LOCAL_BIND_BIT  11
 
 #define I915_VMA_GLOBAL_BIND	((int)BIT(I915_VMA_GLOBAL_BIND_BIT))
 #define I915_VMA_LOCAL_BIND	((int)BIT(I915_VMA_LOCAL_BIND_BIT))
 
-#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | \
-			    I915_VMA_LOCAL_BIND | \
-			    I915_VMA_PIN_OVERFLOW)
+#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)
 
-#define I915_VMA_GGTT_BIT	11
-#define I915_VMA_CAN_FENCE_BIT	12
-#define I915_VMA_USERFAULT_BIT	13
-#define I915_VMA_GGTT_WRITE_BIT	14
+#define I915_VMA_ALLOC_BIT	12
+#define I915_VMA_ALLOC		((int)BIT(I915_VMA_ALLOC_BIT))
+
+#define I915_VMA_ERROR_BIT	13
+#define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
+
+#define I915_VMA_GGTT_BIT	14
+#define I915_VMA_CAN_FENCE_BIT	15
+#define I915_VMA_USERFAULT_BIT	16
+#define I915_VMA_GGTT_WRITE_BIT	17
 
 #define I915_VMA_GGTT		((int)BIT(I915_VMA_GGTT_BIT))
 #define I915_VMA_CAN_FENCE	((int)BIT(I915_VMA_CAN_FENCE_BIT))
@@ -123,6 +126,11 @@ struct i915_vma {
 
 	struct i915_active active;
 
+#define I915_VMA_PAGES_BIAS 24
+#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1)
+	atomic_t pages_count; /* number of active binds to the pages */
+	struct mutex pages_mutex; /* protect acquire/release of backing pages */
+
 	/**
 	 * Support different GGTT views into the same object.
 	 * This means there can be multiple VMA mappings per object and per VM.
@@ -169,6 +177,8 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma)
 	return !i915_active_is_idle(&vma->active);
 }
 
+int __must_check __i915_vma_move_to_active(struct i915_vma *vma,
+					   struct i915_request *rq);
 int __must_check i915_vma_move_to_active(struct i915_vma *vma,
 					 struct i915_request *rq,
 					 unsigned int flags);
@@ -307,13 +317,18 @@ i915_vma_compare(struct i915_vma *vma,
 	return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
 }
 
-int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
-		  u32 flags);
+struct i915_vma_work *i915_vma_work(void);
+int i915_vma_bind(struct i915_vma *vma,
+		  enum i915_cache_level cache_level,
+		  u32 flags,
+		  struct i915_vma_work *work);
+
 bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
 bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 void i915_vma_revoke_mmap(struct i915_vma *vma);
+int __i915_vma_unbind(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
 void i915_vma_unlink_ctx(struct i915_vma *vma);
 void i915_vma_close(struct i915_vma *vma);
@@ -332,26 +347,8 @@ static inline void i915_vma_unlock(struct i915_vma *vma)
 	dma_resv_unlock(vma->resv);
 }
 
-int __i915_vma_do_pin(struct i915_vma *vma,
-		      u64 size, u64 alignment, u64 flags);
-static inline int __must_check
-i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
-{
-	BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
-	BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
-	BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
-
-	/* Pin early to prevent the shrinker/eviction logic from destroying
-	 * our vma as we insert and bind.
-	 */
-	if (likely(((atomic_inc_return(&vma->flags) ^ flags) & I915_VMA_BIND_MASK) == 0)) {
-		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-		GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
-		return 0;
-	}
-
-	return __i915_vma_do_pin(vma, size, alignment, flags);
-}
+int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
 
 static inline int i915_vma_pin_count(const struct i915_vma *vma)
 {
@@ -366,17 +363,17 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
 static inline void __i915_vma_pin(struct i915_vma *vma)
 {
 	atomic_inc(&vma->flags);
-	GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_PIN_OVERFLOW);
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 }
 
 static inline void __i915_vma_unpin(struct i915_vma *vma)
 {
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 	atomic_dec(&vma->flags);
 }
 
 static inline void i915_vma_unpin(struct i915_vma *vma)
 {
-	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	__i915_vma_unpin(vma);
 }
@@ -402,8 +399,6 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node,
  * the caller must call i915_vma_unpin_iomap to relinquish the pinning
  * after the iomapping is no longer required.
  *
- * Callers must hold the struct_mutex.
- *
  * Returns a valid iomapped pointer or ERR_PTR.
  */
 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
@@ -415,8 +410,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
  *
  * Unpins the previously iomapped VMA from i915_vma_pin_iomap().
  *
- * Callers must hold the struct_mutex. This function is only valid to be
- * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap().
+ * This function is only valid to be called on a VMA previously
+ * iomapped by the caller with i915_vma_pin_iomap().
  */
 void i915_vma_unpin_iomap(struct i915_vma *vma);
 
@@ -444,6 +439,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma)
 int __must_check i915_vma_pin_fence(struct i915_vma *vma);
 int __must_check i915_vma_revoke_fence(struct i915_vma *vma);
 
+int __i915_vma_pin_fence(struct i915_vma *vma);
+
 static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 {
 	GEM_BUG_ON(atomic_read(&vma->fence->pin_count) <= 0);
@@ -461,7 +458,6 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma)
 static inline void
 i915_vma_unpin_fence(struct i915_vma *vma)
 {
-	/* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */
 	if (vma->fence)
 		__i915_vma_unpin_fence(vma);
 }
@@ -490,4 +486,10 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
 void i915_vma_make_shrinkable(struct i915_vma *vma);
 void i915_vma_make_purgeable(struct i915_vma *vma);
 
+static inline int i915_vma_sync(struct i915_vma *vma)
+{
+	/* Wait for the asynchronous bindings and pending GPU reads */
+	return i915_active_wait(&vma->active);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 37593831b539..0346c3e5b6b6 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -119,10 +119,8 @@ static void pm_resume(struct drm_i915_private *i915)
 		intel_gt_sanitize(&i915->gt, false);
 		i915_gem_sanitize(i915);
 
-		mutex_lock(&i915->drm.struct_mutex);
 		i915_gem_restore_gtt_mappings(i915);
 		i915_gem_restore_fences(i915);
-		mutex_unlock(&i915->drm.struct_mutex);
 
 		i915_gem_resume(i915);
 	}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 2905fb21d866..75a4695b82bb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -106,14 +106,11 @@ static int populate_ggtt(struct drm_i915_private *i915,
 
 static void unpin_ggtt(struct drm_i915_private *i915)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
 
-	mutex_lock(&ggtt->vm.mutex);
 	list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
 		if (vma->obj->mm.quirked)
 			i915_vma_unpin(vma);
-	mutex_unlock(&ggtt->vm.mutex);
 }
 
 static void cleanup_objects(struct drm_i915_private *i915,
@@ -127,11 +124,7 @@ static void cleanup_objects(struct drm_i915_private *i915,
 		i915_gem_object_put(obj);
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_drain_freed_objects(i915);
-
-	mutex_lock(&i915->drm.struct_mutex);
 }
 
 static int igt_evict_something(void *arg)
@@ -148,10 +141,12 @@ static int igt_evict_something(void *arg)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_something(&ggtt->vm,
 				       I915_GTT_PAGE_SIZE, 0, 0,
 				       0, U64_MAX,
 				       0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err != -ENOSPC) {
 		pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n",
 		       err);
@@ -161,10 +156,12 @@ static int igt_evict_something(void *arg)
 	unpin_ggtt(i915);
 
 	/* Everything is unpinned, we should be able to evict something */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_something(&ggtt->vm,
 				       I915_GTT_PAGE_SIZE, 0, 0,
 				       0, U64_MAX,
 				       0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n",
 		       err);
@@ -230,7 +227,9 @@ static int igt_evict_for_vma(void *arg)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err != -ENOSPC) {
 		pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n",
 		       err);
@@ -240,7 +239,9 @@ static int igt_evict_for_vma(void *arg)
 	unpin_ggtt(i915);
 
 	/* Everything is unpinned, we should be able to evict the node */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_for_node returned err=%d\n",
 		       err);
@@ -319,7 +320,9 @@ static int igt_evict_for_cache_color(void *arg)
 	i915_vma_unpin(vma);
 
 	/* Remove just the second vma */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err);
 		goto cleanup;
@@ -330,7 +333,9 @@ static int igt_evict_for_cache_color(void *arg)
 	 */
 	target.color = I915_CACHE_L3_LLC;
 
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (!err) {
 		pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err);
 		err = -EINVAL;
@@ -360,7 +365,9 @@ static int igt_evict_vm(void *arg)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_vm(&ggtt->vm);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
 		       err);
@@ -369,7 +376,9 @@ static int igt_evict_vm(void *arg)
 
 	unpin_ggtt(i915);
 
+	mutex_lock(&ggtt->vm.mutex);
 	err = i915_gem_evict_vm(&ggtt->vm);
+	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
 		       err);
@@ -410,11 +419,11 @@ static int igt_evict_contexts(void *arg)
 	if (!HAS_FULL_PPGTT(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	/* Reserve a block so that we know we have enough to fit a few rq */
 	memset(&hole, 0, sizeof(hole));
+	mutex_lock(&i915->ggtt.vm.mutex);
 	err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole,
 				  PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE,
 				  0, i915->ggtt.vm.total,
@@ -427,7 +436,9 @@ static int igt_evict_contexts(void *arg)
 	do {
 		struct reserved *r;
 
+		mutex_unlock(&i915->ggtt.vm.mutex);
 		r = kcalloc(1, sizeof(*r), GFP_KERNEL);
+		mutex_lock(&i915->ggtt.vm.mutex);
 		if (!r) {
 			err = -ENOMEM;
 			goto out_locked;
@@ -447,7 +458,7 @@ static int igt_evict_contexts(void *arg)
 		count++;
 	} while (1);
 	drm_mm_remove_node(&hole);
-	mutex_unlock(&i915->drm.struct_mutex);
+	mutex_unlock(&i915->ggtt.vm.mutex);
 	pr_info("Filled GGTT with %lu 1MiB nodes\n", count);
 
 	/* Overfill the GGTT with context objects and so try to evict one. */
@@ -510,7 +521,7 @@ static int igt_evict_contexts(void *arg)
 			break;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
+	mutex_lock(&i915->ggtt.vm.mutex);
 out_locked:
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
@@ -524,8 +535,8 @@ static int igt_evict_contexts(void *arg)
 	}
 	if (drm_mm_node_allocated(&hole))
 		drm_mm_remove_node(&hole);
+	mutex_unlock(&i915->ggtt.vm.mutex);
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
 }
@@ -547,12 +558,9 @@ int i915_gem_evict_mock_selftests(void)
 	if (!i915)
 		return -ENOMEM;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		err = i915_subtests(tests, i915);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	drm_dev_put(&i915->drm);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 84d94eabc7dd..373fbd58a17a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -38,16 +38,7 @@
 
 static void cleanup_freed_objects(struct drm_i915_private *i915)
 {
-	/*
-	 * As we may hold onto the struct_mutex for inordinate lengths of
-	 * time, the NMI khungtaskd detector may fire for the free objects
-	 * worker.
-	 */
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_drain_freed_objects(i915);
-
-	mutex_lock(&i915->drm.struct_mutex);
 }
 
 static void fake_free_pages(struct drm_i915_gem_object *obj,
@@ -880,6 +871,15 @@ static int __shrink_hole(struct drm_i915_private *i915,
 		i915_vma_unpin(vma);
 		addr += size;
 
+		/*
+		 * Since we are injecting allocation faults at random intervals,
+		 * wait for this allocation to complete before we change the
+		 * faultinjection.
+		 */
+		err = i915_vma_sync(vma);
+		if (err)
+			break;
+
 		if (igt_timeout(end_time,
 				"%s timed out at ofset %llx [%llx - %llx]\n",
 				__func__, addr, hole_start, hole_end)) {
@@ -1013,21 +1013,19 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	ppgtt = i915_ppgtt_create(dev_priv);
 	if (IS_ERR(ppgtt)) {
 		err = PTR_ERR(ppgtt);
-		goto out_unlock;
+		goto out_free;
 	}
 	GEM_BUG_ON(offset_in_page(ppgtt->vm.total));
-	GEM_BUG_ON(ppgtt->vm.closed);
+	GEM_BUG_ON(!atomic_read(&ppgtt->vm.open));
 
 	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
 	i915_vm_put(&ppgtt->vm);
-out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+out_free:
 	mock_file_free(dev_priv, file);
 	return err;
 }
@@ -1090,7 +1088,6 @@ static int exercise_ggtt(struct drm_i915_private *i915,
 	IGT_TIMEOUT(end_time);
 	int err = 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
 restart:
 	list_sort(NULL, &ggtt->vm.mm.hole_stack, sort_holes);
 	drm_mm_for_each_hole(node, &ggtt->vm.mm, hole_start, hole_end) {
@@ -1111,7 +1108,6 @@ static int exercise_ggtt(struct drm_i915_private *i915,
 		last = hole_end;
 		goto restart;
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
 }
@@ -1153,13 +1149,9 @@ static int igt_ggtt_page(void *arg)
 	unsigned int *order, n;
 	int err;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto out_unlock;
-	}
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
 
 	err = i915_gem_object_pin_pages(obj);
 	if (err)
@@ -1227,8 +1219,6 @@ static int igt_ggtt_page(void *arg)
 	i915_gem_object_unpin_pages(obj);
 out_free:
 	i915_gem_object_put(obj);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1239,6 +1229,9 @@ static void track_vma_bind(struct i915_vma *vma)
 	atomic_inc(&obj->bind_count); /* track for eviction later */
 	__i915_gem_object_pin_pages(obj);
 
+	GEM_BUG_ON(vma->pages);
+	atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
+	__i915_gem_object_pin_pages(obj);
 	vma->pages = obj->mm.pages;
 
 	mutex_lock(&vma->vm->mutex);
@@ -1335,11 +1328,13 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 					   obj->base.size,
 					   total,
 					   obj->cache_level,
 					   0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1385,11 +1380,13 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 					   obj->base.size,
 					   total,
 					   obj->cache_level,
 					   0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1429,11 +1426,13 @@ static int igt_gtt_reserve(void *arg)
 				       2*I915_GTT_PAGE_SIZE,
 				       I915_GTT_MIN_ALIGNMENT);
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
 					   obj->base.size,
 					   offset,
 					   obj->cache_level,
 					   0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1502,11 +1501,13 @@ static int igt_gtt_insert(void *arg)
 
 	/* Check a couple of obviously invalid requests */
 	for (ii = invalid_insert; ii->size; ii++) {
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &tmp,
 					  ii->size, ii->alignment,
 					  I915_COLOR_UNEVICTABLE,
 					  ii->start, ii->end,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err != -ENOSPC) {
 			pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n",
 			       ii->size, ii->alignment, ii->start, ii->end,
@@ -1542,10 +1543,12 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
 					  obj->base.size, 0, obj->cache_level,
 					  0, ggtt->vm.total,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err == -ENOSPC) {
 			/* maxed out the GGTT space */
 			i915_gem_object_put(obj);
@@ -1600,10 +1603,12 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
 					  obj->base.size, 0, obj->cache_level,
 					  0, ggtt->vm.total,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1647,10 +1652,12 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
+		mutex_lock(&ggtt->vm.mutex);
 		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
 					  obj->base.size, 0, obj->cache_level,
 					  0, ggtt->vm.total,
 					  0);
+		mutex_unlock(&ggtt->vm.mutex);
 		if (err) {
 			pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1694,8 +1701,9 @@ int i915_gem_gtt_mock_selftests(void)
 	}
 	mock_init_ggtt(i915, ggtt);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_subtests(tests, ggtt);
+
+	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 57cd4180d06c..eb175da48547 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -647,8 +647,15 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 	if (err)
 		goto err;
 
+	/* Force the wait wait now to avoid including it in the benchmark */
+	err = i915_vma_sync(vma);
+	if (err)
+		goto err_pin;
+
 	return vma;
 
+err_pin:
+	i915_vma_unpin(vma);
 err:
 	i915_gem_object_put(obj);
 	return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 97752deecccb..0e4f66312b39 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -831,8 +831,9 @@ int i915_vma_mock_selftests(void)
 	}
 	mock_init_ggtt(i915, ggtt);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_subtests(tests, ggtt);
+
+	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
@@ -879,8 +880,6 @@ static int igt_vma_remapped_gtt(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	for (t = types; *t; t++) {
@@ -976,7 +975,6 @@ static int igt_vma_remapped_gtt(void *arg)
 
 out:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	i915_gem_object_put(obj);
 
 	return err;
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 07/30] drm/i915: Push the i915_active.retire into a worker
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (4 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 06/30] drm/i915: Pull i915_vma_pin under the vm->mutex Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 08/30] drm/i915: Coordinate i915_active with its own mutex Chris Wilson
                   ` (28 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

As we need to use a mutex to serialise i915_active activation
(because we want to allow the callback to sleep), we need to push the
i915_active.retire into a worker callback in case we get need to retire
from an atomic context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 .../gpu/drm/i915/display/intel_frontbuffer.c  |  4 ++-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  1 +
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 ++
 drivers/gpu/drm/i915/gt/intel_engine_pool.c   |  1 +
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  1 +
 drivers/gpu/drm/i915/i915_active.c            | 34 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_active_types.h      | 13 ++++++-
 drivers/gpu/drm/i915/i915_vma.c               |  2 ++
 drivers/gpu/drm/i915/selftests/i915_active.c  |  6 ++--
 9 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index fc40dc1fdbcc..6428b8dd70d3 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -206,6 +206,7 @@ static int frontbuffer_active(struct i915_active *ref)
 	return 0;
 }
 
+__i915_active_call
 static void frontbuffer_retire(struct i915_active *ref)
 {
 	struct intel_frontbuffer *front =
@@ -257,7 +258,8 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
 	kref_init(&front->ref);
 	atomic_set(&front->bits, 0);
 	i915_active_init(i915, &front->write,
-			 frontbuffer_active, frontbuffer_retire);
+			 frontbuffer_active,
+			 i915_active_may_sleep(frontbuffer_retire));
 
 	spin_lock(&i915->fb_tracking.lock);
 	if (obj->frontbuffer) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 95f8e66e45db..4cd7d2ecf1d5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -848,6 +848,7 @@ struct context_barrier_task {
 	void *data;
 };
 
+__i915_active_call
 static void cb_retire(struct i915_active *base)
 {
 	struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 26cb838c272c..06fabdf205cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -138,6 +138,7 @@ static void __context_unpin_state(struct i915_vma *vma)
 	__i915_vma_unpin(vma);
 }
 
+__i915_active_call
 static void __intel_context_retire(struct i915_active *active)
 {
 	struct intel_context *ce = container_of(active, typeof(*ce), active);
@@ -150,6 +151,7 @@ static void __intel_context_retire(struct i915_active *active)
 
 	intel_timeline_unpin(ce->timeline);
 	intel_ring_unpin(ce->ring);
+
 	intel_context_put(ce);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 97d36cca8ded..81fab101fdb4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -61,6 +61,7 @@ static int pool_active(struct i915_active *ref)
 	return 0;
 }
 
+__i915_active_call
 static void pool_retire(struct i915_active *ref)
 {
 	struct intel_engine_pool_node *node =
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 9d436e14ea8d..653f60e78392 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -136,6 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 	kfree(cl);
 }
 
+__i915_active_call
 static void __cacheline_retire(struct i915_active *active)
 {
 	struct intel_timeline_cacheline *cl =
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 0791736a08fd..7ca066688b98 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -132,6 +132,7 @@ __active_retire(struct i915_active *ref)
 	bool retire = false;
 
 	lockdep_assert_held(&ref->mutex);
+	GEM_BUG_ON(i915_active_is_idle(ref));
 
 	/* return the unused nodes to our slabcache -- flushing the allocator */
 	if (atomic_dec_and_test(&ref->count)) {
@@ -157,6 +158,19 @@ __active_retire(struct i915_active *ref)
 		ref->retire(ref);
 }
 
+static void
+active_work(struct work_struct *wrk)
+{
+	struct i915_active *ref = container_of(wrk, typeof(*ref), work);
+
+	GEM_BUG_ON(!atomic_read(&ref->count));
+	if (atomic_add_unless(&ref->count, -1, 1))
+		return;
+
+	mutex_lock(&ref->mutex);
+	__active_retire(ref);
+}
+
 static void
 active_retire(struct i915_active *ref)
 {
@@ -164,8 +178,13 @@ active_retire(struct i915_active *ref)
 	if (atomic_add_unless(&ref->count, -1, 1))
 		return;
 
-	/* One active may be flushed from inside the acquire of another */
-	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
+	/* If we are inside interrupt context (fence signaling), defer */
+	if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS ||
+	    !mutex_trylock(&ref->mutex)) {
+		queue_work(system_unbound_wq, &ref->work);
+		return;
+	}
+
 	__active_retire(ref);
 }
 
@@ -240,12 +259,16 @@ void __i915_active_init(struct drm_i915_private *i915,
 			void (*retire)(struct i915_active *ref),
 			struct lock_class_key *key)
 {
+	unsigned long bits;
+
 	debug_active_init(ref);
 
 	ref->i915 = i915;
 	ref->flags = 0;
 	ref->active = active;
-	ref->retire = retire;
+	ref->retire = ptr_unpack_bits(retire, &bits, 2);
+	if (bits & I915_ACTIVE_MAY_SLEEP)
+		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
 
 	ref->excl = NULL;
 	ref->tree = RB_ROOT;
@@ -253,6 +276,7 @@ void __i915_active_init(struct drm_i915_private *i915,
 	init_llist_head(&ref->preallocated_barriers);
 	atomic_set(&ref->count, 0);
 	__mutex_init(&ref->mutex, "i915_active", key);
+	INIT_WORK(&ref->work, active_work);
 }
 
 static bool ____active_del_barrier(struct i915_active *ref,
@@ -504,6 +528,7 @@ int i915_active_wait(struct i915_active *ref)
 	if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE))
 		return -EINTR;
 
+	flush_work(&ref->work);
 	if (!i915_active_is_idle(ref))
 		return -EBUSY;
 
@@ -544,8 +569,9 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 void i915_active_fini(struct i915_active *ref)
 {
 	debug_active_fini(ref);
-	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 	GEM_BUG_ON(atomic_read(&ref->count));
+	GEM_BUG_ON(work_pending(&ref->work));
+	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 	mutex_destroy(&ref->mutex);
 }
 #endif
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 86e7a232ea3c..021167f0004d 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -13,6 +13,9 @@
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+
+#include "i915_utils.h"
 
 struct drm_i915_private;
 struct i915_active_request;
@@ -44,6 +47,11 @@ struct i915_active_request {
 
 struct active_node;
 
+#define I915_ACTIVE_MAY_SLEEP BIT(0)
+
+#define __i915_active_call __aligned(4)
+#define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2)
+
 struct i915_active {
 	struct drm_i915_private *i915;
 
@@ -57,11 +65,14 @@ struct i915_active {
 	struct dma_fence_cb excl_cb;
 
 	unsigned long flags;
-#define I915_ACTIVE_GRAB_BIT 0
+#define I915_ACTIVE_RETIRE_SLEEPS BIT(0)
+#define I915_ACTIVE_GRAB_BIT 1
 
 	int (*active)(struct i915_active *ref);
 	void (*retire)(struct i915_active *ref);
 
+	struct work_struct work;
+
 	struct llist_head preallocated_barriers;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index fe91a0e47b88..e191247c7c5f 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -91,6 +91,7 @@ static int __i915_vma_active(struct i915_active *ref)
 	return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT;
 }
 
+__i915_active_call
 static void __i915_vma_retire(struct i915_active *ref)
 {
 	i915_vma_put(active_to_vma(ref));
@@ -1152,6 +1153,7 @@ int __i915_vma_unbind(struct i915_vma *vma)
 		return -EBUSY;
 	}
 
+	GEM_BUG_ON(i915_vma_is_active(vma));
 	if (!drm_mm_node_allocated(&vma->node))
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index afecfa081ff4..a41785822ed9 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -121,7 +121,7 @@ __live_active_setup(struct drm_i915_private *i915)
 	}
 
 	i915_active_release(&active->base);
-	if (active->retired && count) {
+	if (READ_ONCE(active->retired) && count) {
 		pr_err("i915_active retired before submission!\n");
 		err = -EINVAL;
 	}
@@ -161,7 +161,7 @@ static int live_active_wait(void *arg)
 	}
 
 	i915_active_wait(&active->base);
-	if (!active->retired) {
+	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after waiting!\n");
 		err = -EINVAL;
 	}
@@ -200,7 +200,7 @@ static int live_active_retire(void *arg)
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
 
-	if (!active->retired) {
+	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after flushing!\n");
 		err = -EINVAL;
 	}
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 08/30] drm/i915: Coordinate i915_active with its own mutex
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (5 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 07/30] drm/i915: Push the i915_active.retire into a worker Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 15:49   ` Tvrtko Ursulin
  2019-10-02 11:19 ` [PATCH 09/30] drm/i915: Move idle barrier cleanup into engine-pm Chris Wilson
                   ` (27 subsequent siblings)
  34 siblings, 1 reply; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Forgo the struct_mutex serialisation for i915_active, and interpose its
own mutex handling for active/retire.

This is a multi-layered sleight-of-hand. First, we had to ensure that no
active/retire callbacks accidentally inverted the mutex ordering rules,
nor assumed that they were themselves serialised by struct_mutex. More
challenging though, is the rule over updating elements of the active
rbtree. Instead of the whole i915_active now being serialised by
struct_mutex, allocations/rotations of the tree are serialised by the
i915_active.mutex and individual nodes are serialised by the caller
using the i915_timeline.mutex (we need to use nested spinlocks to
interact with the dma_fence callback lists).

The pain point here is that instead of a single mutex around execbuf, we
now have to take a mutex for active tracker (one for each vma, context,
etc) and a couple of spinlocks for each fence update. The improvement in
fine grained locking allowing for multiple concurrent clients
(eventually!) should be worth it in typical loads.

v2: Add some comments that barely elucidate anything :(

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/display/intel_frontbuffer.c  |   2 +-
 drivers/gpu/drm/i915/display/intel_overlay.c  |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   4 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |   9 +-
 drivers/gpu/drm/i915/gt/intel_context.c       |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine_pool.c   |   2 +-
 drivers/gpu/drm/i915/gt/intel_reset.c         |  10 +-
 drivers/gpu/drm/i915/gt/intel_timeline.c      |   7 +-
 .../gpu/drm/i915/gt/intel_timeline_types.h    |   9 +-
 drivers/gpu/drm/i915/gt/selftest_context.c    |  16 +-
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |  10 +-
 .../gpu/drm/i915/gt/selftests/mock_timeline.c |   2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |   3 -
 drivers/gpu/drm/i915/i915_active.c            | 322 +++++++++---------
 drivers/gpu/drm/i915/i915_active.h            | 319 ++++-------------
 drivers/gpu/drm/i915/i915_active_types.h      |  23 +-
 drivers/gpu/drm/i915/i915_gem.c               |  42 ++-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |   4 +-
 drivers/gpu/drm/i915/i915_request.c           |  38 +--
 drivers/gpu/drm/i915/i915_request.h           |   1 -
 drivers/gpu/drm/i915/i915_vma.c               |   4 +-
 drivers/gpu/drm/i915/selftests/i915_active.c  |  32 +-
 24 files changed, 298 insertions(+), 572 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index 6428b8dd70d3..84b164f31895 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -257,7 +257,7 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
 	front->obj = obj;
 	kref_init(&front->ref);
 	atomic_set(&front->bits, 0);
-	i915_active_init(i915, &front->write,
+	i915_active_init(&front->write,
 			 frontbuffer_active,
 			 i915_active_may_sleep(frontbuffer_retire));
 
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 3f4ac1ee7668..e12e1a753af0 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -1360,8 +1360,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 	overlay->contrast = 75;
 	overlay->saturation = 146;
 
-	i915_active_init(dev_priv,
-			 &overlay->last_flip,
+	i915_active_init(&overlay->last_flip,
 			 NULL, intel_overlay_last_flip_retire);
 
 	ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 4cd7d2ecf1d5..9d85aab68d34 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -868,20 +868,18 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 				void (*task)(void *data),
 				void *data)
 {
-	struct drm_i915_private *i915 = ctx->i915;
 	struct context_barrier_task *cb;
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
 	int err = 0;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!task);
 
 	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
 	if (!cb)
 		return -ENOMEM;
 
-	i915_active_init(i915, &cb->base, NULL, cb_retire);
+	i915_active_init(&cb->base, NULL, cb_retire);
 	err = i915_active_acquire(&cb->base);
 	if (err) {
 		kfree(cb);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index e1aab2fd1cd9..c00b4f077f9e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -8,6 +8,7 @@
 #define __I915_GEM_OBJECT_TYPES_H__
 
 #include <drm/drm_gem.h>
+#include <uapi/drm/i915_drm.h>
 
 #include "i915_active.h"
 #include "i915_selftest.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index cca192ecff8b..0a4115c6c275 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -16,14 +16,11 @@ static void call_idle_barriers(struct intel_engine_cs *engine)
 	struct llist_node *node, *next;
 
 	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
-		struct i915_active_request *active =
+		struct dma_fence_cb *cb =
 			container_of((struct list_head *)node,
-				     typeof(*active), link);
+				     typeof(*cb), node);
 
-		INIT_LIST_HEAD(&active->link);
-		RCU_INIT_POINTER(active->request, NULL);
-
-		active->retire(active, NULL);
+		cb->func(NULL, cb);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 06fabdf205cf..35a40c2820a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -240,7 +240,7 @@ intel_context_init(struct intel_context *ce,
 
 	mutex_init(&ce->pin_mutex);
 
-	i915_active_init(ctx->i915, &ce->active,
+	i915_active_init(&ce->active,
 			 __intel_context_active, __intel_context_retire);
 }
 
@@ -307,7 +307,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 			return err;
 
 		/* Queue this switch after current activity by this context. */
-		err = i915_active_request_set(&tl->last_request, rq);
+		err = i915_active_fence_set(&tl->last_request, rq);
 		mutex_unlock(&tl->mutex);
 		if (err)
 			return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 81fab101fdb4..3cdbd5f8b5be 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -95,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
 		return ERR_PTR(-ENOMEM);
 
 	node->pool = pool;
-	i915_active_init(engine->i915, &node->active, pool_active, pool_retire);
+	i915_active_init(&node->active, pool_active, pool_retire);
 
 	obj = i915_gem_object_create_internal(engine->i915, sz);
 	if (IS_ERR(obj)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index e189897e8797..055496f0825f 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -844,10 +844,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	 */
 	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct i915_request *rq;
+		struct dma_fence *fence;
 
-		rq = i915_active_request_get_unlocked(&tl->last_request);
-		if (!rq)
+		fence = i915_active_fence_get(&tl->last_request);
+		if (!fence)
 			continue;
 
 		spin_unlock_irqrestore(&timelines->lock, flags);
@@ -859,8 +859,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		 * (I915_FENCE_TIMEOUT) so this wait should not be unbounded
 		 * in the worst case.
 		 */
-		dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
-		i915_request_put(rq);
+		dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
+		dma_fence_put(fence);
 
 		/* Restart iteration after droping lock */
 		spin_lock_irqsave(&timelines->lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 653f60e78392..0f959694303c 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -178,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 	cl->hwsp = hwsp;
 	cl->vaddr = page_pack_bits(vaddr, cacheline);
 
-	i915_active_init(hwsp->gt->i915, &cl->active,
-			 __cacheline_active, __cacheline_retire);
+	i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
 
 	return cl;
 }
@@ -255,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+	INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
@@ -443,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 	 * free it after the current request is retired, which ensures that
 	 * all writes into the cacheline from previous requests are complete.
 	 */
-	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
+	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
 	if (err)
 		goto err_cacheline;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index c668c4c50e75..98d9ee166379 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -58,12 +58,13 @@ struct intel_timeline {
 	 */
 	struct list_head requests;
 
-	/* Contains an RCU guarded pointer to the last request. No reference is
+	/*
+	 * Contains an RCU guarded pointer to the last request. No reference is
 	 * held to the request, users must carefully acquire a reference to
-	 * the request using i915_active_request_get_request_rcu(), or hold the
-	 * struct_mutex.
+	 * the request using i915_active_fence_get(), or manage the RCU
+	 * protection themselves (cf the i915_active_fence API).
 	 */
-	struct i915_active_request last_request;
+	struct i915_active_fence last_request;
 
 	/**
 	 * We track the most recent seqno that we wait on in every context so
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 4ce1e25433d2..e6bcbe7ab5e1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -47,24 +47,20 @@ static int context_sync(struct intel_context *ce)
 
 	mutex_lock(&tl->mutex);
 	do {
-		struct i915_request *rq;
+		struct dma_fence *fence;
 		long timeout;
 
-		rcu_read_lock();
-		rq = rcu_dereference(tl->last_request.request);
-		if (rq)
-			rq = i915_request_get_rcu(rq);
-		rcu_read_unlock();
-		if (!rq)
+		fence = i915_active_fence_get(&tl->last_request);
+		if (!fence)
 			break;
 
-		timeout = i915_request_wait(rq, 0, HZ / 10);
+		timeout = dma_fence_wait_timeout(fence, false, HZ / 10);
 		if (timeout < 0)
 			err = timeout;
 		else
-			i915_request_retire_upto(rq);
+			i915_request_retire_upto(to_request(fence));
 
-		i915_request_put(rq);
+		dma_fence_put(fence);
 	} while (!err);
 	mutex_unlock(&tl->mutex);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 2da4fa5149cb..01d5a26b9aa7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1176,9 +1176,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	if (!rq)
 		return NULL;
 
-	INIT_LIST_HEAD(&rq->active_list);
 	rq->engine = engine;
 
+	spin_lock_init(&rq->lock);
+	INIT_LIST_HEAD(&rq->fence.cb_list);
+	rq->fence.lock = &rq->lock;
+	rq->fence.ops = &i915_fence_ops;
+
 	i915_sched_node_init(&rq->sched);
 
 	/* mark this request as permanently incomplete */
@@ -1271,8 +1275,8 @@ static int live_suppress_wait_preempt(void *arg)
 				}
 
 				/* Disable NEWCLIENT promotion */
-				__i915_active_request_set(&i915_request_timeline(rq[i])->last_request,
-							  dummy);
+				__i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
+							&dummy->fence);
 				i915_request_add(rq[i]);
 			}
 
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 598170efcaf6..2a77c051f36a 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+	INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 6c79d16b381e..03f567084548 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -385,11 +385,8 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct i915_request *rq;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	if (workload->req)
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 7ca066688b98..023652ded4be 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -12,8 +12,6 @@
 #include "i915_active.h"
 #include "i915_globals.h"
 
-#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
-
 /*
  * Active refs memory management
  *
@@ -27,35 +25,35 @@ static struct i915_global_active {
 } global;
 
 struct active_node {
-	struct i915_active_request base;
+	struct i915_active_fence base;
 	struct i915_active *ref;
 	struct rb_node node;
 	u64 timeline;
 };
 
 static inline struct active_node *
-node_from_active(struct i915_active_request *active)
+node_from_active(struct i915_active_fence *active)
 {
 	return container_of(active, struct active_node, base);
 }
 
 #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
 
-static inline bool is_barrier(const struct i915_active_request *active)
+static inline bool is_barrier(const struct i915_active_fence *active)
 {
-	return IS_ERR(rcu_access_pointer(active->request));
+	return IS_ERR(rcu_access_pointer(active->fence));
 }
 
 static inline struct llist_node *barrier_to_ll(struct active_node *node)
 {
 	GEM_BUG_ON(!is_barrier(&node->base));
-	return (struct llist_node *)&node->base.link;
+	return (struct llist_node *)&node->base.cb.node;
 }
 
 static inline struct intel_engine_cs *
 __barrier_to_engine(struct active_node *node)
 {
-	return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev);
+	return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
 }
 
 static inline struct intel_engine_cs *
@@ -68,7 +66,7 @@ barrier_to_engine(struct active_node *node)
 static inline struct active_node *barrier_from_ll(struct llist_node *x)
 {
 	return container_of((struct list_head *)x,
-			    struct active_node, base.link);
+			    struct active_node, base.cb.node);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
@@ -147,15 +145,18 @@ __active_retire(struct i915_active *ref)
 	if (!retire)
 		return;
 
-	GEM_BUG_ON(rcu_access_pointer(ref->excl));
+	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
 	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
-		GEM_BUG_ON(i915_active_request_isset(&it->base));
+		GEM_BUG_ON(i915_active_fence_isset(&it->base));
 		kmem_cache_free(global.slab_cache, it);
 	}
 
 	/* After the final retire, the entire struct may be freed */
 	if (ref->retire)
 		ref->retire(ref);
+
+	/* ... except if you wait on it, you must manage your own references! */
+	wake_up_var(ref);
 }
 
 static void
@@ -189,12 +190,20 @@ active_retire(struct i915_active *ref)
 }
 
 static void
-node_retire(struct i915_active_request *base, struct i915_request *rq)
+node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	active_retire(node_from_active(base)->ref);
+	i915_active_fence_cb(fence, cb);
+	active_retire(container_of(cb, struct active_node, base.cb)->ref);
 }
 
-static struct i915_active_request *
+static void
+excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	i915_active_fence_cb(fence, cb);
+	active_retire(container_of(cb, struct i915_active, excl.cb));
+}
+
+static struct i915_active_fence *
 active_instance(struct i915_active *ref, struct intel_timeline *tl)
 {
 	struct active_node *node, *prealloc;
@@ -238,7 +247,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 	}
 
 	node = prealloc;
-	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
+	__i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire);
 	node->ref = ref;
 	node->timeline = idx;
 
@@ -253,8 +262,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
 	return &node->base;
 }
 
-void __i915_active_init(struct drm_i915_private *i915,
-			struct i915_active *ref,
+void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
 			struct lock_class_key *key)
@@ -263,19 +271,18 @@ void __i915_active_init(struct drm_i915_private *i915,
 
 	debug_active_init(ref);
 
-	ref->i915 = i915;
 	ref->flags = 0;
 	ref->active = active;
 	ref->retire = ptr_unpack_bits(retire, &bits, 2);
 	if (bits & I915_ACTIVE_MAY_SLEEP)
 		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
 
-	ref->excl = NULL;
 	ref->tree = RB_ROOT;
 	ref->cache = NULL;
 	init_llist_head(&ref->preallocated_barriers);
 	atomic_set(&ref->count, 0);
 	__mutex_init(&ref->mutex, "i915_active", key);
+	__i915_active_fence_init(&ref->excl, &ref->mutex, NULL, excl_retire);
 	INIT_WORK(&ref->work, active_work);
 }
 
@@ -329,9 +336,9 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
 
 int i915_active_ref(struct i915_active *ref,
 		    struct intel_timeline *tl,
-		    struct i915_request *rq)
+		    struct dma_fence *fence)
 {
-	struct i915_active_request *active;
+	struct i915_active_fence *active;
 	int err;
 
 	lockdep_assert_held(&tl->mutex);
@@ -354,66 +361,44 @@ int i915_active_ref(struct i915_active *ref,
 		 * request that we want to emit on the kernel_context.
 		 */
 		__active_del_barrier(ref, node_from_active(active));
-		RCU_INIT_POINTER(active->request, NULL);
-		INIT_LIST_HEAD(&active->link);
-	} else {
-		if (!i915_active_request_isset(active))
-			atomic_inc(&ref->count);
+		RCU_INIT_POINTER(active->fence, NULL);
+		atomic_dec(&ref->count);
 	}
-	GEM_BUG_ON(!atomic_read(&ref->count));
-	__i915_active_request_set(active, rq);
+	if (!__i915_active_fence_set(active, fence))
+		atomic_inc(&ref->count);
 
 out:
 	i915_active_release(ref);
 	return err;
 }
 
-static void excl_cb(struct dma_fence *f, struct dma_fence_cb *cb)
-{
-	struct i915_active *ref = container_of(cb, typeof(*ref), excl_cb);
-
-	RCU_INIT_POINTER(ref->excl, NULL);
-	dma_fence_put(f);
-
-	active_retire(ref);
-}
-
 void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
 {
 	/* We expect the caller to manage the exclusive timeline ordering */
 	GEM_BUG_ON(i915_active_is_idle(ref));
 
-	dma_fence_get(f);
-
-	rcu_read_lock();
-	if (rcu_access_pointer(ref->excl)) {
-		struct dma_fence *old;
-
-		old = dma_fence_get_rcu_safe(&ref->excl);
-		if (old) {
-			if (dma_fence_remove_callback(old, &ref->excl_cb))
-				atomic_dec(&ref->count);
-			dma_fence_put(old);
-		}
-	}
-	rcu_read_unlock();
-
-	atomic_inc(&ref->count);
-	rcu_assign_pointer(ref->excl, f);
+	/*
+	 * As we don't know which mutex the caller is using, we told a small
+	 * lie to the debug code that it is using the i915_active.mutex;
+	 * and now we must stick to that lie.
+	 */
+	mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_);
+	if (!__i915_active_fence_set(&ref->excl, f))
+		atomic_inc(&ref->count);
+	mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_);
+}
 
-	if (dma_fence_add_callback(f, &ref->excl_cb, excl_cb)) {
-		RCU_INIT_POINTER(ref->excl, NULL);
-		atomic_dec(&ref->count);
-		dma_fence_put(f);
-	}
+bool i915_active_acquire_if_busy(struct i915_active *ref)
+{
+	debug_active_assert(ref);
+	return atomic_add_unless(&ref->count, 1, 0);
 }
 
 int i915_active_acquire(struct i915_active *ref)
 {
 	int err;
 
-	debug_active_assert(ref);
-	if (atomic_add_unless(&ref->count, 1, 0))
+	if (i915_active_acquire_if_busy(ref))
 		return 0;
 
 	err = mutex_lock_interruptible(&ref->mutex);
@@ -438,121 +423,57 @@ void i915_active_release(struct i915_active *ref)
 	active_retire(ref);
 }
 
-static void __active_ungrab(struct i915_active *ref)
-{
-	clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags);
-}
-
-bool i915_active_trygrab(struct i915_active *ref)
+static void enable_signaling(struct i915_active_fence *active)
 {
-	debug_active_assert(ref);
-
-	if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags))
-		return false;
-
-	if (!atomic_add_unless(&ref->count, 1, 0)) {
-		__active_ungrab(ref);
-		return false;
-	}
+	struct dma_fence *fence;
 
-	return true;
-}
-
-void i915_active_ungrab(struct i915_active *ref)
-{
-	GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags));
-
-	active_retire(ref);
-	__active_ungrab(ref);
-}
-
-static int excl_wait(struct i915_active *ref)
-{
-	struct dma_fence *old;
-	int err = 0;
-
-	if (!rcu_access_pointer(ref->excl))
-		return 0;
-
-	rcu_read_lock();
-	old = dma_fence_get_rcu_safe(&ref->excl);
-	rcu_read_unlock();
-	if (old) {
-		err = dma_fence_wait(old, true);
-		dma_fence_put(old);
-	}
+	fence = i915_active_fence_get(active);
+	if (!fence)
+		return;
 
-	return err;
+	dma_fence_enable_sw_signaling(fence);
+	dma_fence_put(fence);
 }
 
 int i915_active_wait(struct i915_active *ref)
 {
 	struct active_node *it, *n;
-	int err;
+	int err = 0;
 
 	might_sleep();
-	might_lock(&ref->mutex);
-
-	if (i915_active_is_idle(ref))
-		return 0;
-
-	err = mutex_lock_interruptible(&ref->mutex);
-	if (err)
-		return err;
 
-	if (!atomic_add_unless(&ref->count, 1, 0)) {
-		mutex_unlock(&ref->mutex);
+	if (!i915_active_acquire_if_busy(ref))
 		return 0;
-	}
-
-	err = excl_wait(ref);
-	if (err)
-		goto out;
 
+	/* Flush lazy signals */
+	enable_signaling(&ref->excl);
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
-			err = -EBUSY;
-			break;
-		}
+		if (is_barrier(&it->base)) /* unconnected idle barrier */
+			continue;
 
-		err = i915_active_request_retire(&it->base, BKL(ref));
-		if (err)
-			break;
+		enable_signaling(&it->base);
 	}
+	/* Any fence added after the wait begins will not be auto-signaled */
 
-out:
-	__active_retire(ref);
+	i915_active_release(ref);
 	if (err)
 		return err;
 
-	if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE))
+	if (wait_var_event_interruptible(ref, i915_active_is_idle(ref)))
 		return -EINTR;
 
-	flush_work(&ref->work);
-	if (!i915_active_is_idle(ref))
-		return -EBUSY;
-
 	return 0;
 }
 
-int i915_request_await_active_request(struct i915_request *rq,
-				      struct i915_active_request *active)
-{
-	struct i915_request *barrier =
-		i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
-
-	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
-}
-
 int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 {
 	int err = 0;
 
-	if (rcu_access_pointer(ref->excl)) {
+	if (rcu_access_pointer(ref->excl.fence)) {
 		struct dma_fence *fence;
 
 		rcu_read_lock();
-		fence = dma_fence_get_rcu_safe(&ref->excl);
+		fence = dma_fence_get_rcu_safe(&ref->excl.fence);
 		rcu_read_unlock();
 		if (fence) {
 			err = i915_request_await_dma_fence(rq, fence);
@@ -578,7 +499,7 @@ void i915_active_fini(struct i915_active *ref)
 
 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
 {
-	return node->timeline == idx && !i915_active_request_isset(&node->base);
+	return node->timeline == idx && !i915_active_fence_isset(&node->base);
 }
 
 static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
@@ -698,13 +619,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 			node->base.lock =
 				&engine->kernel_context->timeline->mutex;
 #endif
-			RCU_INIT_POINTER(node->base.request, NULL);
-			node->base.retire = node_retire;
+			RCU_INIT_POINTER(node->base.fence, NULL);
+			node->base.cb.func = node_retire;
 			node->timeline = idx;
 			node->ref = ref;
 		}
 
-		if (!i915_active_request_isset(&node->base)) {
+		if (!i915_active_fence_isset(&node->base)) {
 			/*
 			 * Mark this as being *our* unconnected proto-node.
 			 *
@@ -714,8 +635,8 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 			 * and then we can use the rb_node and list pointers
 			 * for our tracking of the pending barrier.
 			 */
-			RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
-			node->base.link.prev = (void *)engine;
+			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
+			node->base.cb.node.prev = (void *)engine;
 			atomic_inc(&ref->count);
 		}
 
@@ -782,44 +703,113 @@ void i915_request_add_active_barriers(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
 	struct llist_node *node, *next;
+	unsigned long flags;
 
 	GEM_BUG_ON(intel_engine_is_virtual(engine));
 	GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
 
+	node = llist_del_all(&engine->barrier_tasks);
+	if (!node)
+		return;
 	/*
 	 * Attach the list of proto-fences to the in-flight request such
 	 * that the parent i915_active will be released when this request
 	 * is retired.
 	 */
-	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
-		RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq);
+	spin_lock_irqsave(&rq->lock, flags);
+	llist_for_each_safe(node, next, node) {
+		RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence);
 		smp_wmb(); /* serialise with reuse_idle_barrier */
-		list_add_tail((struct list_head *)node, &rq->active_list);
+		list_add_tail((struct list_head *)node, &rq->fence.cb_list);
+	}
+	spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+#define active_is_held(active) lockdep_is_held((active)->lock)
+#else
+#define active_is_held(active) true
+#endif
+
+/*
+ * __i915_active_fence_set: Update the last active fence along its timeline
+ * @active: the active tracker
+ * @fence: the new fence (under construction)
+ *
+ * Records the new @fence as the last active fence along its timeline in
+ * this active tracker, moving the tracking callbacks from the previous
+ * fence onto this one. Returns the previous fence (if not already completed),
+ * which the caller must ensure is executed before the new fence. To ensure
+ * that the order of fences within the timeline of the i915_active_fence is
+ * maintained, it must be locked by the caller.
+ */
+struct dma_fence *
+__i915_active_fence_set(struct i915_active_fence *active,
+			struct dma_fence *fence)
+{
+	struct dma_fence *prev;
+	unsigned long flags;
+
+	/* NB: must be serialised by an outer timeline mutex (active->lock) */
+	spin_lock_irqsave(fence->lock, flags);
+	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
+
+	prev = rcu_dereference_protected(active->fence, active_is_held(active));
+	if (prev) {
+		GEM_BUG_ON(prev == fence);
+		spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+		__list_del_entry(&active->cb.node);
+		spin_unlock(prev->lock); /* serialise with prev->cb_list */
+
+		/*
+		 * active->fence is reset by the callback from inside
+		 * interrupt context. We need to serialise our list
+		 * manipulation with the fence->lock to prevent the prev
+		 * being lost inside an interrupt (it can't be replaced as
+		 * no other caller is allowed to enter __i915_active_fence_set
+		 * as we hold the timeline lock). After serialising with
+		 * the callback, we need to double check which ran first,
+		 * our list_del() [decoupling prev from the callback] or
+		 * the callback...
+		 */
+		prev = rcu_access_pointer(active->fence);
 	}
+
+	rcu_assign_pointer(active->fence, fence);
+	list_add_tail(&active->cb.node, &fence->cb_list);
+
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	return prev;
 }
 
-int i915_active_request_set(struct i915_active_request *active,
-			    struct i915_request *rq)
+int i915_active_fence_set(struct i915_active_fence *active,
+			  struct i915_request *rq)
 {
-	int err;
+	struct dma_fence *fence;
+	int err = 0;
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 	lockdep_assert_held(active->lock);
 #endif
 
-	/* Must maintain ordering wrt previous active requests */
-	err = i915_request_await_active_request(rq, active);
-	if (err)
-		return err;
+	/* Must maintain timeline ordering wrt previous active requests */
+	rcu_read_lock();
+	fence = __i915_active_fence_set(active, &rq->fence);
+	if (fence) /* but the previous fence may not belong to that timeline! */
+		fence = dma_fence_get_rcu(fence);
+	rcu_read_unlock();
+	if (fence) {
+		err = i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
 
-	__i915_active_request_set(active, rq);
-	return 0;
+	return err;
 }
 
-void i915_active_retire_noop(struct i915_active_request *active,
-			     struct i915_request *request)
+void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	/* Space left intentionally blank */
+	i915_active_fence_cb(fence, cb);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 90034f61b7c2..4f52fe6146d2 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -12,6 +12,10 @@
 #include "i915_active_types.h"
 #include "i915_request.h"
 
+struct i915_request;
+struct intel_engine_cs;
+struct intel_timeline;
+
 /*
  * We treat requests as fences. This is not be to confused with our
  * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
@@ -28,308 +32,108 @@
  * write access so that we can perform concurrent read operations between
  * the CPU and GPU engines, as well as waiting for all rendering to
  * complete, or waiting for the last GPU user of a "fence register". The
- * object then embeds a #i915_active_request to track the most recent (in
+ * object then embeds a #i915_active_fence to track the most recent (in
  * retirement order) request relevant for the desired mode of access.
- * The #i915_active_request is updated with i915_active_request_set() to
+ * The #i915_active_fence is updated with i915_active_fence_set() to
  * track the most recent fence request, typically this is done as part of
  * i915_vma_move_to_active().
  *
- * When the #i915_active_request completes (is retired), it will
+ * When the #i915_active_fence completes (is retired), it will
  * signal its completion to the owner through a callback as well as mark
- * itself as idle (i915_active_request.request == NULL). The owner
+ * itself as idle (i915_active_fence.request == NULL). The owner
  * can then perform any action, such as delayed freeing of an active
  * resource including itself.
  */
 
-void i915_active_retire_noop(struct i915_active_request *active,
-			     struct i915_request *request);
+void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb);
 
 /**
- * i915_active_request_init - prepares the activity tracker for use
+ * __i915_active_fence_init - prepares the activity tracker for use
  * @active - the active tracker
- * @rq - initial request to track, can be NULL
+ * @fence - initial fence to track, can be NULL
  * @func - a callback when then the tracker is retired (becomes idle),
  *         can be NULL
  *
- * i915_active_request_init() prepares the embedded @active struct for use as
- * an activity tracker, that is for tracking the last known active request
- * associated with it. When the last request becomes idle, when it is retired
+ * i915_active_fence_init() prepares the embedded @active struct for use as
+ * an activity tracker, that is for tracking the last known active fence
+ * associated with it. When the last fence becomes idle, when it is retired
  * after completion, the optional callback @func is invoked.
  */
 static inline void
-i915_active_request_init(struct i915_active_request *active,
+__i915_active_fence_init(struct i915_active_fence *active,
 			 struct mutex *lock,
-			 struct i915_request *rq,
-			 i915_active_retire_fn retire)
+			 void *fence,
+			 dma_fence_func_t fn)
 {
-	RCU_INIT_POINTER(active->request, rq);
-	INIT_LIST_HEAD(&active->link);
-	active->retire = retire ?: i915_active_retire_noop;
+	RCU_INIT_POINTER(active->fence, fence);
+	active->cb.func = fn ?: i915_active_noop;
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 	active->lock = lock;
 #endif
 }
 
-#define INIT_ACTIVE_REQUEST(name, lock) \
-	i915_active_request_init((name), (lock), NULL, NULL)
-
-/**
- * i915_active_request_set - updates the tracker to watch the current request
- * @active - the active tracker
- * @request - the request to watch
- *
- * __i915_active_request_set() watches the given @request for completion. Whilst
- * that @request is busy, the @active reports busy. When that @request is
- * retired, the @active tracker is updated to report idle.
- */
-static inline void
-__i915_active_request_set(struct i915_active_request *active,
-			  struct i915_request *request)
-{
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
-	lockdep_assert_held(active->lock);
-#endif
-	list_move(&active->link, &request->active_list);
-	rcu_assign_pointer(active->request, request);
-}
+#define INIT_ACTIVE_FENCE(A, LOCK) \
+	__i915_active_fence_init((A), (LOCK), NULL, NULL)
 
-int __must_check
-i915_active_request_set(struct i915_active_request *active,
-			struct i915_request *rq);
+struct dma_fence *
+__i915_active_fence_set(struct i915_active_fence *active,
+			struct dma_fence *fence);
 
 /**
- * i915_active_request_raw - return the active request
+ * i915_active_fence_set - updates the tracker to watch the current fence
  * @active - the active tracker
+ * @rq - the request to watch
  *
- * i915_active_request_raw() returns the current request being tracked, or NULL.
- * It does not obtain a reference on the request for the caller, so the caller
- * must hold struct_mutex.
+ * i915_active_fence_set() watches the given @rq for completion. While
+ * that @rq is busy, the @active reports busy. When that @rq is signaled
+ * (or else retired) the @active tracker is updated to report idle.
  */
-static inline struct i915_request *
-i915_active_request_raw(const struct i915_active_request *active,
-			struct mutex *mutex)
-{
-	return rcu_dereference_protected(active->request,
-					 lockdep_is_held(mutex));
-}
-
-/**
- * i915_active_request_peek - report the active request being monitored
- * @active - the active tracker
- *
- * i915_active_request_peek() returns the current request being tracked if
- * still active, or NULL. It does not obtain a reference on the request
- * for the caller, so the caller must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_active_request_peek(const struct i915_active_request *active,
-			 struct mutex *mutex)
-{
-	struct i915_request *request;
-
-	request = i915_active_request_raw(active, mutex);
-	if (!request || i915_request_completed(request))
-		return NULL;
-
-	return request;
-}
-
-/**
- * i915_active_request_get - return a reference to the active request
- * @active - the active tracker
- *
- * i915_active_request_get() returns a reference to the active request, or NULL
- * if the active tracker is idle. The caller must hold struct_mutex.
- */
-static inline struct i915_request *
-i915_active_request_get(const struct i915_active_request *active,
-			struct mutex *mutex)
-{
-	return i915_request_get(i915_active_request_peek(active, mutex));
-}
-
-/**
- * __i915_active_request_get_rcu - return a reference to the active request
- * @active - the active tracker
- *
- * __i915_active_request_get() returns a reference to the active request,
- * or NULL if the active tracker is idle. The caller must hold the RCU read
- * lock, but the returned pointer is safe to use outside of RCU.
- */
-static inline struct i915_request *
-__i915_active_request_get_rcu(const struct i915_active_request *active)
-{
-	/*
-	 * Performing a lockless retrieval of the active request is super
-	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
-	 * slab of request objects will not be freed whilst we hold the
-	 * RCU read lock. It does not guarantee that the request itself
-	 * will not be freed and then *reused*. Viz,
-	 *
-	 * Thread A			Thread B
-	 *
-	 * rq = active.request
-	 *				retire(rq) -> free(rq);
-	 *				(rq is now first on the slab freelist)
-	 *				active.request = NULL
-	 *
-	 *				rq = new submission on a new object
-	 * ref(rq)
-	 *
-	 * To prevent the request from being reused whilst the caller
-	 * uses it, we take a reference like normal. Whilst acquiring
-	 * the reference we check that it is not in a destroyed state
-	 * (refcnt == 0). That prevents the request being reallocated
-	 * whilst the caller holds on to it. To check that the request
-	 * was not reallocated as we acquired the reference we have to
-	 * check that our request remains the active request across
-	 * the lookup, in the same manner as a seqlock. The visibility
-	 * of the pointer versus the reference counting is controlled
-	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
-	 *
-	 * In the middle of all that, we inspect whether the request is
-	 * complete. Retiring is lazy so the request may be completed long
-	 * before the active tracker is updated. Querying whether the
-	 * request is complete is far cheaper (as it involves no locked
-	 * instructions setting cachelines to exclusive) than acquiring
-	 * the reference, so we do it first. The RCU read lock ensures the
-	 * pointer dereference is valid, but does not ensure that the
-	 * seqno nor HWS is the right one! However, if the request was
-	 * reallocated, that means the active tracker's request was complete.
-	 * If the new request is also complete, then both are and we can
-	 * just report the active tracker is idle. If the new request is
-	 * incomplete, then we acquire a reference on it and check that
-	 * it remained the active request.
-	 *
-	 * It is then imperative that we do not zero the request on
-	 * reallocation, so that we can chase the dangling pointers!
-	 * See i915_request_alloc().
-	 */
-	do {
-		struct i915_request *request;
-
-		request = rcu_dereference(active->request);
-		if (!request || i915_request_completed(request))
-			return NULL;
-
-		/*
-		 * An especially silly compiler could decide to recompute the
-		 * result of i915_request_completed, more specifically
-		 * re-emit the load for request->fence.seqno. A race would catch
-		 * a later seqno value, which could flip the result from true to
-		 * false. Which means part of the instructions below might not
-		 * be executed, while later on instructions are executed. Due to
-		 * barriers within the refcounting the inconsistency can't reach
-		 * past the call to i915_request_get_rcu, but not executing
-		 * that while still executing i915_request_put() creates
-		 * havoc enough.  Prevent this with a compiler barrier.
-		 */
-		barrier();
-
-		request = i915_request_get_rcu(request);
-
-		/*
-		 * What stops the following rcu_access_pointer() from occurring
-		 * before the above i915_request_get_rcu()? If we were
-		 * to read the value before pausing to get the reference to
-		 * the request, we may not notice a change in the active
-		 * tracker.
-		 *
-		 * The rcu_access_pointer() is a mere compiler barrier, which
-		 * means both the CPU and compiler are free to perform the
-		 * memory read without constraint. The compiler only has to
-		 * ensure that any operations after the rcu_access_pointer()
-		 * occur afterwards in program order. This means the read may
-		 * be performed earlier by an out-of-order CPU, or adventurous
-		 * compiler.
-		 *
-		 * The atomic operation at the heart of
-		 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
-		 * atomic_inc_not_zero() which is only a full memory barrier
-		 * when successful. That is, if i915_request_get_rcu()
-		 * returns the request (and so with the reference counted
-		 * incremented) then the following read for rcu_access_pointer()
-		 * must occur after the atomic operation and so confirm
-		 * that this request is the one currently being tracked.
-		 *
-		 * The corresponding write barrier is part of
-		 * rcu_assign_pointer().
-		 */
-		if (!request || request == rcu_access_pointer(active->request))
-			return rcu_pointer_handoff(request);
-
-		i915_request_put(request);
-	} while (1);
-}
-
+int __must_check
+i915_active_fence_set(struct i915_active_fence *active,
+		      struct i915_request *rq);
 /**
- * i915_active_request_get_unlocked - return a reference to the active request
+ * i915_active_fence_get - return a reference to the active fence
  * @active - the active tracker
  *
- * i915_active_request_get_unlocked() returns a reference to the active request,
+ * i915_active_fence_get() returns a reference to the active fence,
  * or NULL if the active tracker is idle. The reference is obtained under RCU,
  * so no locking is required by the caller.
  *
- * The reference should be freed with i915_request_put().
+ * The reference should be freed with dma_fence_put().
  */
-static inline struct i915_request *
-i915_active_request_get_unlocked(const struct i915_active_request *active)
+static inline struct dma_fence *
+i915_active_fence_get(struct i915_active_fence *active)
 {
-	struct i915_request *request;
+	struct dma_fence *fence;
 
 	rcu_read_lock();
-	request = __i915_active_request_get_rcu(active);
+	fence = dma_fence_get_rcu_safe(&active->fence);
 	rcu_read_unlock();
 
-	return request;
+	return fence;
 }
 
 /**
- * i915_active_request_isset - report whether the active tracker is assigned
+ * i915_active_fence_isset - report whether the active tracker is assigned
  * @active - the active tracker
  *
- * i915_active_request_isset() returns true if the active tracker is currently
- * assigned to a request. Due to the lazy retiring, that request may be idle
+ * i915_active_fence_isset() returns true if the active tracker is currently
+ * assigned to a fence. Due to the lazy retiring, that fence may be idle
  * and this may report stale information.
  */
 static inline bool
-i915_active_request_isset(const struct i915_active_request *active)
+i915_active_fence_isset(const struct i915_active_fence *active)
 {
-	return rcu_access_pointer(active->request);
+	return rcu_access_pointer(active->fence);
 }
 
-/**
- * i915_active_request_retire - waits until the request is retired
- * @active - the active request on which to wait
- *
- * i915_active_request_retire() waits until the request is completed,
- * and then ensures that at least the retirement handler for this
- * @active tracker is called before returning. If the @active
- * tracker is idle, the function returns immediately.
- */
-static inline int __must_check
-i915_active_request_retire(struct i915_active_request *active,
-			   struct mutex *mutex)
+static inline void
+i915_active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
-	struct i915_request *request;
-	long ret;
-
-	request = i915_active_request_raw(active, mutex);
-	if (!request)
-		return 0;
-
-	ret = i915_request_wait(request,
-				I915_WAIT_INTERRUPTIBLE,
-				MAX_SCHEDULE_TIMEOUT);
-	if (ret < 0)
-		return ret;
+	struct i915_active_fence *active =
+		container_of(cb, typeof(*active), cb);
 
-	list_del_init(&active->link);
-	RCU_INIT_POINTER(active->request, NULL);
-
-	active->retire(active, request);
-
-	return 0;
+	RCU_INIT_POINTER(active->fence, NULL);
 }
 
 /*
@@ -358,47 +162,40 @@ i915_active_request_retire(struct i915_active_request *active,
  * synchronisation.
  */
 
-void __i915_active_init(struct drm_i915_private *i915,
-			struct i915_active *ref,
+void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
 			struct lock_class_key *key);
-#define i915_active_init(i915, ref, active, retire) do {		\
+#define i915_active_init(ref, active, retire) do {		\
 	static struct lock_class_key __key;				\
 									\
-	__i915_active_init(i915, ref, active, retire, &__key);		\
+	__i915_active_init(ref, active, retire, &__key);		\
 } while (0)
 
 int i915_active_ref(struct i915_active *ref,
 		    struct intel_timeline *tl,
-		    struct i915_request *rq);
+		    struct dma_fence *fence);
 
 static inline int
 i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
 {
-	return i915_active_ref(ref, i915_request_timeline(rq), rq);
+	return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
 }
 
 void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
 
 static inline bool i915_active_has_exclusive(struct i915_active *ref)
 {
-	return rcu_access_pointer(ref->excl);
+	return rcu_access_pointer(ref->excl.fence);
 }
 
 int i915_active_wait(struct i915_active *ref);
 
-int i915_request_await_active(struct i915_request *rq,
-			      struct i915_active *ref);
-int i915_request_await_active_request(struct i915_request *rq,
-				      struct i915_active_request *active);
+int i915_request_await_active(struct i915_request *rq, struct i915_active *ref);
 
 int i915_active_acquire(struct i915_active *ref);
+bool i915_active_acquire_if_busy(struct i915_active *ref);
 void i915_active_release(struct i915_active *ref);
-void __i915_active_release_nested(struct i915_active *ref, int subclass);
-
-bool i915_active_trygrab(struct i915_active *ref);
-void i915_active_ungrab(struct i915_active *ref);
 
 static inline bool
 i915_active_is_idle(const struct i915_active *ref)
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 021167f0004d..d89a74c142c6 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -17,17 +17,9 @@
 
 #include "i915_utils.h"
 
-struct drm_i915_private;
-struct i915_active_request;
-struct i915_request;
-
-typedef void (*i915_active_retire_fn)(struct i915_active_request *,
-				      struct i915_request *);
-
-struct i915_active_request {
-	struct i915_request __rcu *request;
-	struct list_head link;
-	i915_active_retire_fn retire;
+struct i915_active_fence {
+	struct dma_fence __rcu *fence;
+	struct dma_fence_cb cb;
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 	/*
 	 * Incorporeal!
@@ -53,20 +45,17 @@ struct active_node;
 #define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2)
 
 struct i915_active {
-	struct drm_i915_private *i915;
+	atomic_t count;
+	struct mutex mutex;
 
 	struct active_node *cache;
 	struct rb_root tree;
-	struct mutex mutex;
-	atomic_t count;
 
 	/* Preallocated "exclusive" node */
-	struct dma_fence __rcu *excl;
-	struct dma_fence_cb excl_cb;
+	struct i915_active_fence excl;
 
 	unsigned long flags;
 #define I915_ACTIVE_RETIRE_SLEEPS BIT(0)
-#define I915_ACTIVE_GRAB_BIT 1
 
 	int (*active)(struct i915_active *ref);
 	void (*retire)(struct i915_active *ref);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f50058cf8ab8..64890627d638 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -892,28 +892,38 @@ wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout)
 
 	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct i915_request *rq;
+		struct dma_fence *fence;
 
-		rq = i915_active_request_get_unlocked(&tl->last_request);
-		if (!rq)
+		fence = i915_active_fence_get(&tl->last_request);
+		if (!fence)
 			continue;
 
 		spin_unlock_irqrestore(&timelines->lock, flags);
 
-		/*
-		 * "Race-to-idle".
-		 *
-		 * Switching to the kernel context is often used a synchronous
-		 * step prior to idling, e.g. in suspend for flushing all
-		 * current operations to memory before sleeping. These we
-		 * want to complete as quickly as possible to avoid prolonged
-		 * stalls, so allow the gpu to boost to maximum clocks.
-		 */
-		if (wait & I915_WAIT_FOR_IDLE_BOOST)
-			gen6_rps_boost(rq);
+		if (!dma_fence_is_i915(fence)) {
+			timeout = dma_fence_wait_timeout(fence,
+							 flags & I915_WAIT_INTERRUPTIBLE,
+							 timeout);
+		} else {
+			struct i915_request *rq = to_request(fence);
+
+			/*
+			 * "Race-to-idle".
+			 *
+			 * Switching to the kernel context is often used as
+			 * a synchronous step prior to idling, e.g. in suspend
+			 * for flushing all current operations to memory before
+			 * sleeping. These we want to complete as quickly as
+			 * possible to avoid prolonged stalls, so allow the gpu
+			 * to boost to maximum clocks.
+			 */
+			if (flags & I915_WAIT_FOR_IDLE_BOOST)
+				gen6_rps_boost(rq);
+
+			timeout = i915_request_wait(rq, flags, timeout);
+		}
 
-		timeout = i915_request_wait(rq, wait, timeout);
-		i915_request_put(rq);
+		dma_fence_put(fence);
 		if (timeout < 0)
 			return timeout;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 55cebf256d03..7462d87f7a48 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1861,7 +1861,6 @@ static const struct i915_vma_ops pd_vma_ops = {
 
 static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 {
-	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
 	struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
 	struct i915_vma *vma;
 
@@ -1872,7 +1871,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 	if (!vma)
 		return ERR_PTR(-ENOMEM);
 
-	i915_active_init(i915, &vma->active, NULL, NULL);
+	i915_active_init(&vma->active, NULL, NULL);
 
 	mutex_init(&vma->pages_mutex);
 	vma->vm = i915_vm_get(&ggtt->vm);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6384a06aa5bf..a28ee754b7b4 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1299,7 +1299,7 @@ capture_vma(struct capture_vma *next,
 	if (!c)
 		return next;
 
-	if (!i915_active_trygrab(&vma->active)) {
+	if (!i915_active_acquire_if_busy(&vma->active)) {
 		kfree(c);
 		return next;
 	}
@@ -1439,7 +1439,7 @@ gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
 			*this->slot =
 				i915_error_object_create(i915, vma, compress);
 
-			i915_active_ungrab(&vma->active);
+			i915_active_release(&vma->active);
 			i915_vma_put(vma);
 
 			capture = this->next;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a8916412759b..4ffe62a42186 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -218,8 +218,6 @@ static void remove_from_engine(struct i915_request *rq)
 
 static bool i915_request_retire(struct i915_request *rq)
 {
-	struct i915_active_request *active, *next;
-
 	if (!i915_request_completed(rq))
 		return false;
 
@@ -244,35 +242,6 @@ static bool i915_request_retire(struct i915_request *rq)
 				  &i915_request_timeline(rq)->requests));
 	rq->ring->head = rq->postfix;
 
-	/*
-	 * Walk through the active list, calling retire on each. This allows
-	 * objects to track their GPU activity and mark themselves as idle
-	 * when their *last* active request is completed (updating state
-	 * tracking lists for eviction, active references for GEM, etc).
-	 *
-	 * As the ->retire() may free the node, we decouple it first and
-	 * pass along the auxiliary information (to avoid dereferencing
-	 * the node after the callback).
-	 */
-	list_for_each_entry_safe(active, next, &rq->active_list, link) {
-		/*
-		 * In microbenchmarks or focusing upon time inside the kernel,
-		 * we may spend an inordinate amount of time simply handling
-		 * the retirement of requests and processing their callbacks.
-		 * Of which, this loop itself is particularly hot due to the
-		 * cache misses when jumping around the list of
-		 * i915_active_request.  So we try to keep this loop as
-		 * streamlined as possible and also prefetch the next
-		 * i915_active_request to try and hide the likely cache miss.
-		 */
-		prefetchw(next);
-
-		INIT_LIST_HEAD(&active->link);
-		RCU_INIT_POINTER(active->request, NULL);
-
-		active->retire(active, rq);
-	}
-
 	local_irq_disable();
 
 	/*
@@ -704,7 +673,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq->flags = 0;
 	rq->execution_mask = ALL_ENGINES;
 
-	INIT_LIST_HEAD(&rq->active_list);
 	INIT_LIST_HEAD(&rq->execute_cb);
 
 	/*
@@ -743,7 +711,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	ce->ring->emit = rq->head;
 
 	/* Make sure we didn't add ourselves to external state before freeing */
-	GEM_BUG_ON(!list_empty(&rq->active_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
@@ -1174,8 +1141,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * precludes optimising to use semaphores serialisation of a single
 	 * timeline across engines.
 	 */
-	prev = rcu_dereference_protected(timeline->last_request.request,
-					 lockdep_is_held(&timeline->mutex));
+	prev = to_request(__i915_active_fence_set(&timeline->last_request,
+						  &rq->fence));
 	if (prev && !i915_request_completed(prev)) {
 		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
 			i915_sw_fence_await_sw_fence(&rq->submit,
@@ -1200,7 +1167,6 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * us, the timeline will hold its seqno which is later than ours.
 	 */
 	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
-	__i915_active_request_set(&timeline->last_request, rq);
 
 	return prev;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index ec5bb4c2e5ae..91a885c36c6b 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -211,7 +211,6 @@ struct i915_request {
 	 * on the active_list (of their final request).
 	 */
 	struct i915_capture_list *capture_list;
-	struct list_head active_list;
 
 	/** Time at which this request was emitted, in jiffies. */
 	unsigned long emitted_jiffies;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index e191247c7c5f..9fdcd4e2c799 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -120,8 +120,7 @@ vma_create(struct drm_i915_gem_object *obj,
 	vma->size = obj->base.size;
 	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
-	i915_active_init(vm->i915, &vma->active,
-			 __i915_vma_active, __i915_vma_retire);
+	i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire);
 
 	/* Declare ourselves safe for use inside shrinkers */
 	if (IS_ENABLED(CONFIG_LOCKDEP)) {
@@ -1148,6 +1147,7 @@ int __i915_vma_unbind(struct i915_vma *vma)
 	if (ret)
 		return ret;
 
+	GEM_BUG_ON(i915_vma_is_active(vma));
 	if (i915_vma_is_pinned(vma)) {
 		vma_print_allocator(vma, "is pinned");
 		return -EBUSY;
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index a41785822ed9..2cc71bcf884f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -68,7 +68,7 @@ static struct live_active *__live_alloc(struct drm_i915_private *i915)
 		return NULL;
 
 	kref_init(&active->ref);
-	i915_active_init(i915, &active->base, __live_active, __live_retire);
+	i915_active_init(&active->base, __live_active, __live_retire);
 
 	return active;
 }
@@ -146,19 +146,13 @@ static int live_active_wait(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct live_active *active;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that we get a callback when requests retire upon waiting */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	active = __live_active_setup(i915);
-	if (IS_ERR(active)) {
-		err = PTR_ERR(active);
-		goto err;
-	}
+	if (IS_ERR(active))
+		return PTR_ERR(active);
 
 	i915_active_wait(&active->base);
 	if (!READ_ONCE(active->retired)) {
@@ -168,11 +162,9 @@ static int live_active_wait(void *arg)
 
 	__live_put(active);
 
+	mutex_lock(&i915->drm.struct_mutex);
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
-
-err:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
@@ -182,23 +174,19 @@ static int live_active_retire(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct live_active *active;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/* Check that we get a callback when requests are indirectly retired */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	active = __live_active_setup(i915);
-	if (IS_ERR(active)) {
-		err = PTR_ERR(active);
-		goto err;
-	}
+	if (IS_ERR(active))
+		return PTR_ERR(active);
 
 	/* waits for & retires all requests */
+	mutex_lock(&i915->drm.struct_mutex);
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after flushing!\n");
@@ -207,10 +195,6 @@ static int live_active_retire(void *arg)
 
 	__live_put(active);
 
-err:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return err;
 }
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 09/30] drm/i915: Move idle barrier cleanup into engine-pm
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (6 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 08/30] drm/i915: Coordinate i915_active with its own mutex Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 10/30] drm/i915: Drop struct_mutex from around i915_retire_requests() Chris Wilson
                   ` (26 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Now that we now longer need to guarantee that the active callback is
under the struct_mutex, we can lift it out of the i915_gem_park() and
into the engine parking itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c    | 19 -------------------
 drivers/gpu/drm/i915/gt/intel_engine_pm.c | 15 +++++++++++++++
 drivers/gpu/drm/i915/i915_active.c        |  1 +
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 0a4115c6c275..5180b2ee1cb7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -11,29 +11,10 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 
-static void call_idle_barriers(struct intel_engine_cs *engine)
-{
-	struct llist_node *node, *next;
-
-	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
-		struct dma_fence_cb *cb =
-			container_of((struct list_head *)node,
-				     typeof(*cb), node);
-
-		cb->func(NULL, cb);
-	}
-}
-
 static void i915_gem_park(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	for_each_engine(engine, i915, id)
-		call_idle_barriers(engine); /* cleanup after wedging */
-
 	i915_vma_parked(i915);
 
 	i915_globals_park();
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 66fc49b76ea8..8e5e513eddc9 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -124,6 +124,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	return result;
 }
 
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+		struct dma_fence_cb *cb =
+			container_of((struct list_head *)node,
+				     typeof(*cb), node);
+
+		cb->func(NULL, cb);
+	}
+}
+
 static int __engine_park(struct intel_wakeref *wf)
 {
 	struct intel_engine_cs *engine =
@@ -143,6 +156,8 @@ static int __engine_park(struct intel_wakeref *wf)
 
 	GEM_TRACE("%s\n", engine->name);
 
+	call_idle_barriers(engine); /* cleanup after wedging */
+
 	intel_engine_disarm_breadcrumbs(engine);
 	intel_engine_pool_park(&engine->pool);
 
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 023652ded4be..aa37c07004b9 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -693,6 +693,7 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 		rb_link_node(&node->node, parent, p);
 		rb_insert_color(&node->node, &ref->tree);
 
+		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
 		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
 		intel_engine_pm_put(engine);
 	}
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 10/30] drm/i915: Drop struct_mutex from around i915_retire_requests()
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (7 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 09/30] drm/i915: Move idle barrier cleanup into engine-pm Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 11/30] drm/i915: Remove the GEM idle worker Chris Wilson
                   ` (25 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

We don't need to hold struct_mutex now for retiring requests, so drop it
from i915_retire_requests() and i915_gem_wait_for_idle(), finally
removing I915_WAIT_LOCKED for good.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    |   7 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  20 +--
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  45 ++----
 .../i915/gem/selftests/i915_gem_coherency.c   |  40 +++--
 .../drm/i915/gem/selftests/i915_gem_context.c |  18 +--
 .../drm/i915/gem/selftests/i915_gem_mman.c    |   6 +-
 .../i915/gem/selftests/i915_gem_object_blt.c  |   4 -
 drivers/gpu/drm/i915/gt/intel_gt_pm.c         |  28 +---
 drivers/gpu/drm/i915/gt/selftest_context.c    |   4 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  89 ++---------
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |  23 ++-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  91 +++++------
 .../gpu/drm/i915/gt/selftest_workarounds.c    |   6 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |  42 ++---
 drivers/gpu/drm/i915/i915_gem.c               |  19 +--
 drivers/gpu/drm/i915/i915_request.h           |   7 +-
 drivers/gpu/drm/i915/selftests/i915_active.c  |   8 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |   2 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   6 +-
 drivers/gpu/drm/i915/selftests/i915_request.c | 151 +++++-------------
 .../gpu/drm/i915/selftests/i915_selftest.c    |   8 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   4 -
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  30 ++--
 .../gpu/drm/i915/selftests/igt_flush_test.h   |   2 +-
 .../gpu/drm/i915/selftests/igt_live_test.c    |   9 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   4 -
 26 files changed, 213 insertions(+), 460 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index c1fca5728e6e..81366aa4812b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -155,7 +155,6 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence,
 static void clear_pages_worker(struct work_struct *work)
 {
 	struct clear_pages_work *w = container_of(work, typeof(*w), work);
-	struct drm_i915_private *i915 = w->ce->engine->i915;
 	struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
 	struct i915_vma *vma = w->sleeve->vma;
 	struct i915_request *rq;
@@ -173,11 +172,9 @@ static void clear_pages_worker(struct work_struct *work)
 	obj->read_domains = I915_GEM_GPU_DOMAINS;
 	obj->write_domain = 0;
 
-	/* XXX: we need to kill this */
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
 	if (unlikely(err))
-		goto out_unlock;
+		goto out_signal;
 
 	batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
 	if (IS_ERR(batch)) {
@@ -229,8 +226,6 @@ static void clear_pages_worker(struct work_struct *work)
 	intel_emit_vma_release(w->ce, batch);
 out_unpin:
 	i915_vma_unpin(vma);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 out_signal:
 	if (unlikely(err)) {
 		dma_fence_set_error(&w->dma, err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 9d85aab68d34..0ab416887fc2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1159,8 +1159,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 }
 
 static int
-__intel_context_reconfigure_sseu(struct intel_context *ce,
-				 struct intel_sseu sseu)
+intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
 {
 	int ret;
 
@@ -1183,23 +1182,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce,
 	return ret;
 }
 
-static int
-intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
-{
-	struct drm_i915_private *i915 = ce->engine->i915;
-	int ret;
-
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	ret = __intel_context_reconfigure_sseu(ce, sseu);
-
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return ret;
-}
-
 static int
 user_to_context_sseu(struct drm_i915_private *i915,
 		     const struct drm_i915_gem_context_param_sseu *user,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 5180b2ee1cb7..2ddc3aeaac9d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -48,11 +48,7 @@ static void retire_work_handler(struct work_struct *work)
 	struct drm_i915_private *i915 =
 		container_of(work, typeof(*i915), gem.retire_work.work);
 
-	/* Come back later if the device is busy... */
-	if (mutex_trylock(&i915->drm.struct_mutex)) {
-		i915_retire_requests(i915);
-		mutex_unlock(&i915->drm.struct_mutex);
-	}
+	i915_retire_requests(i915);
 
 	queue_delayed_work(i915->wq,
 			   &i915->gem.retire_work,
@@ -86,26 +82,23 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt)
 {
 	bool result = !intel_gt_is_wedged(gt);
 
-	do {
-		if (i915_gem_wait_for_idle(gt->i915,
-					   I915_WAIT_LOCKED |
-					   I915_WAIT_FOR_IDLE_BOOST,
-					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
-			/* XXX hide warning from gem_eio */
-			if (i915_modparams.reset) {
-				dev_err(gt->i915->drm.dev,
-					"Failed to idle engines, declaring wedged!\n");
-				GEM_TRACE_DUMP();
-			}
-
-			/*
-			 * Forcibly cancel outstanding work and leave
-			 * the gpu quiet.
-			 */
-			intel_gt_set_wedged(gt);
-			result = false;
+	if (i915_gem_wait_for_idle(gt->i915,
+				   I915_WAIT_FOR_IDLE_BOOST,
+				   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+		/* XXX hide warning from gem_eio */
+		if (i915_modparams.reset) {
+			dev_err(gt->i915->drm.dev,
+				"Failed to idle engines, declaring wedged!\n");
+			GEM_TRACE_DUMP();
 		}
-	} while (i915_retire_requests(gt->i915) && result);
+
+		/*
+		 * Forcibly cancel outstanding work and leave
+		 * the gpu quiet.
+		 */
+		intel_gt_set_wedged(gt);
+		result = false;
+	}
 
 	if (intel_gt_pm_wait_for_idle(gt))
 		result = false;
@@ -145,8 +138,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 
 	user_forcewake(&i915->gt, true);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	/*
 	 * We have to flush all the executing contexts to main memory so
 	 * that they can saved in the hibernation image. To ensure the last
@@ -158,8 +149,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	switch_to_kernel_context_sync(&i915->gt);
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
 
 	i915_gem_drain_freed_objects(i915);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 0ff7a89aadca..549810f70aeb 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -7,6 +7,7 @@
 #include <linux/prime_numbers.h>
 
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
 
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
@@ -78,7 +79,7 @@ static int gtt_set(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 	u32 __iomem *map;
-	int err;
+	int err = 0;
 
 	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
@@ -90,15 +91,21 @@ static int gtt_set(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	intel_gt_pm_get(vma->vm->gt);
+
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out_rpm;
+	}
 
 	iowrite32(v, &map[offset / sizeof(*map)]);
 	i915_vma_unpin_iomap(vma);
 
-	return 0;
+out_rpm:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
 }
 
 static int gtt_get(struct drm_i915_gem_object *obj,
@@ -107,7 +114,7 @@ static int gtt_get(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 	u32 __iomem *map;
-	int err;
+	int err = 0;
 
 	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
@@ -119,15 +126,21 @@ static int gtt_get(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	intel_gt_pm_get(vma->vm->gt);
+
 	map = i915_vma_pin_iomap(vma);
 	i915_vma_unpin(vma);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out_rpm;
+	}
 
 	*v = ioread32(&map[offset / sizeof(*map)]);
 	i915_vma_unpin_iomap(vma);
 
-	return 0;
+out_rpm:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
 }
 
 static int wc_set(struct drm_i915_gem_object *obj,
@@ -280,7 +293,6 @@ static int igt_gem_coherency(void *arg)
 	struct drm_i915_private *i915 = arg;
 	const struct igt_coherency_mode *read, *write, *over;
 	struct drm_i915_gem_object *obj;
-	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	u32 *offsets, *values;
 	int err = 0;
@@ -299,8 +311,6 @@ static int igt_gem_coherency(void *arg)
 
 	values = offsets + ncachelines;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	for (over = igt_coherency_mode; over->name; over++) {
 		if (!over->set)
 			continue;
@@ -326,7 +336,7 @@ static int igt_gem_coherency(void *arg)
 					obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 					if (IS_ERR(obj)) {
 						err = PTR_ERR(obj);
-						goto unlock;
+						goto free;
 					}
 
 					i915_random_reorder(offsets, ncachelines, &prng);
@@ -377,15 +387,13 @@ static int igt_gem_coherency(void *arg)
 			}
 		}
 	}
-unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+free:
 	kfree(offsets);
 	return err;
 
 put_object:
 	i915_gem_object_put(obj);
-	goto unlock;
+	goto free;
 }
 
 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 8eba0d3a31de..f5402aad9b5a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -164,7 +164,6 @@ struct parallel_switch {
 static int __live_parallel_switch1(void *data)
 {
 	struct parallel_switch *arg = data;
-	struct drm_i915_private *i915 = arg->ce[0]->engine->i915;
 	IGT_TIMEOUT(end_time);
 	unsigned long count;
 
@@ -176,16 +175,12 @@ static int __live_parallel_switch1(void *data)
 		for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
 			i915_request_put(rq);
 
-			mutex_lock(&i915->drm.struct_mutex);
 			rq = i915_request_create(arg->ce[n]);
-			if (IS_ERR(rq)) {
-				mutex_unlock(&i915->drm.struct_mutex);
+			if (IS_ERR(rq))
 				return PTR_ERR(rq);
-			}
 
 			i915_request_get(rq);
 			i915_request_add(rq);
-			mutex_unlock(&i915->drm.struct_mutex);
 		}
 
 		err = 0;
@@ -205,7 +200,6 @@ static int __live_parallel_switch1(void *data)
 static int __live_parallel_switchN(void *data)
 {
 	struct parallel_switch *arg = data;
-	struct drm_i915_private *i915 = arg->ce[0]->engine->i915;
 	IGT_TIMEOUT(end_time);
 	unsigned long count;
 	int n;
@@ -215,15 +209,11 @@ static int __live_parallel_switchN(void *data)
 		for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
 			struct i915_request *rq;
 
-			mutex_lock(&i915->drm.struct_mutex);
 			rq = i915_request_create(arg->ce[n]);
-			if (IS_ERR(rq)) {
-				mutex_unlock(&i915->drm.struct_mutex);
+			if (IS_ERR(rq))
 				return PTR_ERR(rq);
-			}
 
 			i915_request_add(rq);
-			mutex_unlock(&i915->drm.struct_mutex);
 		}
 
 		count++;
@@ -1173,7 +1163,7 @@ __sseu_test(const char *name,
 	if (ret)
 		return ret;
 
-	ret = __intel_context_reconfigure_sseu(ce, sseu);
+	ret = intel_context_reconfigure_sseu(ce, sseu);
 	if (ret)
 		goto out_spin;
 
@@ -1277,7 +1267,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		goto out_fail;
 
 out_fail:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		ret = -EIO;
 
 	intel_context_unpin(ce);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 36aca1c172e7..856b8e467ee8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -581,12 +581,8 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 
 static void restore_retire_worker(struct drm_i915_private *i915)
 {
+	igt_flush_test(i915);
 	intel_gt_pm_put(&i915->gt);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	igt_flush_test(i915, I915_WAIT_LOCKED);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_driver_register__shrinker(i915);
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
index c21d747e7d05..9ec55b3a3815 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -65,9 +65,7 @@ static int igt_fill_blt(void *arg)
 		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			obj->cache_dirty = true;
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = i915_gem_object_fill_blt(obj, ce, val);
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			goto err_unpin;
 
@@ -166,9 +164,7 @@ static int igt_copy_blt(void *arg)
 		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 			dst->cache_dirty = true;
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = i915_gem_object_copy_blt(src, dst, ce);
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			goto err_unpin;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 29fa1dabbc2e..d4cefdd38431 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -196,26 +196,14 @@ int intel_gt_resume(struct intel_gt *gt)
 
 static void wait_for_idle(struct intel_gt *gt)
 {
-	mutex_lock(&gt->i915->drm.struct_mutex); /* XXX */
-	do {
-		if (i915_gem_wait_for_idle(gt->i915,
-					   I915_WAIT_LOCKED,
-					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
-			/* XXX hide warning from gem_eio */
-			if (i915_modparams.reset) {
-				dev_err(gt->i915->drm.dev,
-					"Failed to idle engines, declaring wedged!\n");
-				GEM_TRACE_DUMP();
-			}
-
-			/*
-			 * Forcibly cancel outstanding work and leave
-			 * the gpu quiet.
-			 */
-			intel_gt_set_wedged(gt);
-		}
-	} while (i915_retire_requests(gt->i915));
-	mutex_unlock(&gt->i915->drm.struct_mutex);
+	if (i915_gem_wait_for_idle(gt->i915, 0,
+				   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+		/*
+		 * Forcibly cancel outstanding work and leave
+		 * the gpu quiet.
+		 */
+		intel_gt_set_wedged(gt);
+	}
 
 	intel_gt_pm_wait_for_idle(gt);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index e6bcbe7ab5e1..86cffbb0a9cb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -318,7 +318,7 @@ static int live_active_context(void *arg)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -431,7 +431,7 @@ static int live_remote_context(void *arg)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index d3bee9f88008..ffbb3d23b887 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -58,7 +58,9 @@ static int hang_init(struct hang *h, struct intel_gt *gt)
 	memset(h, 0, sizeof(*h));
 	h->gt = gt;
 
+	mutex_lock(&gt->i915->drm.struct_mutex);
 	h->ctx = kernel_context(gt->i915);
+	mutex_unlock(&gt->i915->drm.struct_mutex);
 	if (IS_ERR(h->ctx))
 		return PTR_ERR(h->ctx);
 
@@ -285,7 +287,7 @@ static void hang_fini(struct hang *h)
 
 	kernel_context_close(h->ctx);
 
-	igt_flush_test(h->gt->i915, I915_WAIT_LOCKED);
+	igt_flush_test(h->gt->i915);
 }
 
 static bool wait_until_running(struct hang *h, struct i915_request *rq)
@@ -309,10 +311,9 @@ static int igt_hang_sanitycheck(void *arg)
 
 	/* Basic check that we can execute our hanging batch */
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
-		goto unlock;
+		return err;
 
 	for_each_engine(engine, gt->i915, id) {
 		struct intel_wedge_me w;
@@ -355,8 +356,6 @@ static int igt_hang_sanitycheck(void *arg)
 
 fini:
 	hang_fini(&h);
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	return err;
 }
 
@@ -395,8 +394,6 @@ static int igt_reset_nop(void *arg)
 	reset_count = i915_reset_count(global);
 	count = 0;
 	do {
-		mutex_lock(&gt->i915->drm.struct_mutex);
-
 		for_each_engine(engine, gt->i915, id) {
 			int i;
 
@@ -417,7 +414,6 @@ static int igt_reset_nop(void *arg)
 		intel_gt_reset(gt, ALL_ENGINES, NULL);
 		igt_global_reset_unlock(gt);
 
-		mutex_unlock(&gt->i915->drm.struct_mutex);
 		if (intel_gt_is_wedged(gt)) {
 			err = -EIO;
 			break;
@@ -429,16 +425,13 @@ static int igt_reset_nop(void *arg)
 			break;
 		}
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	} while (time_before(jiffies, end_time));
 	pr_info("%s: %d resets\n", __func__, count);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
+	err = igt_flush_test(gt->i915);
 out:
 	mock_file_free(gt->i915, file);
 	if (intel_gt_is_wedged(gt))
@@ -494,7 +487,6 @@ static int igt_reset_nop_engine(void *arg)
 				break;
 			}
 
-			mutex_lock(&gt->i915->drm.struct_mutex);
 			for (i = 0; i < 16; i++) {
 				struct i915_request *rq;
 
@@ -507,7 +499,6 @@ static int igt_reset_nop_engine(void *arg)
 				i915_request_add(rq);
 			}
 			err = intel_engine_reset(engine, NULL);
-			mutex_unlock(&gt->i915->drm.struct_mutex);
 			if (err) {
 				pr_err("i915_reset_engine failed\n");
 				break;
@@ -533,15 +524,12 @@ static int igt_reset_nop_engine(void *arg)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
+	err = igt_flush_test(gt->i915);
 out:
 	mock_file_free(gt->i915, file);
 	if (intel_gt_is_wedged(gt))
@@ -563,9 +551,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 		return 0;
 
 	if (active) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
 		err = hang_init(&h, gt);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
 		if (err)
 			return err;
 	}
@@ -593,17 +579,14 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 			if (active) {
 				struct i915_request *rq;
 
-				mutex_lock(&gt->i915->drm.struct_mutex);
 				rq = hang_create_request(&h, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
-					mutex_unlock(&gt->i915->drm.struct_mutex);
 					break;
 				}
 
 				i915_request_get(rq);
 				i915_request_add(rq);
-				mutex_unlock(&gt->i915->drm.struct_mutex);
 
 				if (!wait_until_running(&h, rq)) {
 					struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -647,7 +630,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 		if (err)
 			break;
 
-		err = igt_flush_test(gt->i915, 0);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -655,11 +638,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 	if (intel_gt_is_wedged(gt))
 		err = -EIO;
 
-	if (active) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
+	if (active)
 		hang_fini(&h);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
-	}
 
 	return err;
 }
@@ -741,10 +721,8 @@ static int active_engine(void *data)
 		struct i915_request *old = rq[idx];
 		struct i915_request *new;
 
-		mutex_lock(&engine->i915->drm.struct_mutex);
 		new = igt_request_alloc(ctx[idx], engine);
 		if (IS_ERR(new)) {
-			mutex_unlock(&engine->i915->drm.struct_mutex);
 			err = PTR_ERR(new);
 			break;
 		}
@@ -755,7 +733,6 @@ static int active_engine(void *data)
 
 		rq[idx] = i915_request_get(new);
 		i915_request_add(new);
-		mutex_unlock(&engine->i915->drm.struct_mutex);
 
 		err = active_request_put(old);
 		if (err)
@@ -795,9 +772,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
 		return 0;
 
 	if (flags & TEST_ACTIVE) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
 		err = hang_init(&h, gt);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
 		if (err)
 			return err;
 
@@ -855,17 +830,14 @@ static int __igt_reset_engines(struct intel_gt *gt,
 			struct i915_request *rq = NULL;
 
 			if (flags & TEST_ACTIVE) {
-				mutex_lock(&gt->i915->drm.struct_mutex);
 				rq = hang_create_request(&h, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
-					mutex_unlock(&gt->i915->drm.struct_mutex);
 					break;
 				}
 
 				i915_request_get(rq);
 				i915_request_add(rq);
-				mutex_unlock(&gt->i915->drm.struct_mutex);
 
 				if (!wait_until_running(&h, rq)) {
 					struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -977,9 +949,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
 		if (err)
 			break;
 
-		mutex_lock(&gt->i915->drm.struct_mutex);
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -987,11 +957,8 @@ static int __igt_reset_engines(struct intel_gt *gt,
 	if (intel_gt_is_wedged(gt))
 		err = -EIO;
 
-	if (flags & TEST_ACTIVE) {
-		mutex_lock(&gt->i915->drm.struct_mutex);
+	if (flags & TEST_ACTIVE)
 		hang_fini(&h);
-		mutex_unlock(&gt->i915->drm.struct_mutex);
-	}
 
 	return err;
 }
@@ -1061,7 +1028,6 @@ static int igt_reset_wait(void *arg)
 
 	igt_global_reset_lock(gt);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
 		goto unlock;
@@ -1109,7 +1075,6 @@ static int igt_reset_wait(void *arg)
 fini:
 	hang_fini(&h);
 unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	igt_global_reset_unlock(gt);
 
 	if (intel_gt_is_wedged(gt))
@@ -1189,10 +1154,9 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
 
 	/* Check that we can recover an unbind stuck on a hanging request */
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
-		goto unlock;
+		return err;
 
 	obj = i915_gem_object_create_internal(gt->i915, SZ_1M);
 	if (IS_ERR(obj)) {
@@ -1255,8 +1219,6 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
 	if (err)
 		goto out_rq;
 
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	if (!wait_until_running(&h, rq)) {
 		struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
 
@@ -1305,16 +1267,12 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
 		put_task_struct(tsk);
 	}
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 out_rq:
 	i915_request_put(rq);
 out_obj:
 	i915_gem_object_put(obj);
 fini:
 	hang_fini(&h);
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	if (intel_gt_is_wedged(gt))
 		return -EIO;
 
@@ -1396,7 +1354,6 @@ static int igt_reset_queue(void *arg)
 
 	igt_global_reset_lock(gt);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	err = hang_init(&h, gt);
 	if (err)
 		goto unlock;
@@ -1511,7 +1468,7 @@ static int igt_reset_queue(void *arg)
 
 		i915_request_put(prev);
 
-		err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(gt->i915);
 		if (err)
 			break;
 	}
@@ -1519,7 +1476,6 @@ static int igt_reset_queue(void *arg)
 fini:
 	hang_fini(&h);
 unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	igt_global_reset_unlock(gt);
 
 	if (intel_gt_is_wedged(gt))
@@ -1546,11 +1502,9 @@ static int igt_handle_error(void *arg)
 	if (!engine || !intel_engine_can_store_dword(engine))
 		return 0;
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
 	err = hang_init(&h, gt);
 	if (err)
-		goto err_unlock;
+		return err;
 
 	rq = hang_create_request(&h, engine);
 	if (IS_ERR(rq)) {
@@ -1574,8 +1528,6 @@ static int igt_handle_error(void *arg)
 		goto err_request;
 	}
 
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	/* Temporarily disable error capture */
 	error = xchg(&global->first_error, (void *)-1);
 
@@ -1583,8 +1535,6 @@ static int igt_handle_error(void *arg)
 
 	xchg(&global->first_error, error);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
 	if (rq->fence.error != -EIO) {
 		pr_err("Guilty request not identified!\n");
 		err = -EINVAL;
@@ -1595,8 +1545,6 @@ static int igt_handle_error(void *arg)
 	i915_request_put(rq);
 err_fini:
 	hang_fini(&h);
-err_unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1689,7 +1637,6 @@ static int igt_reset_engines_atomic(void *arg)
 		return 0;
 
 	igt_global_reset_lock(gt);
-	mutex_lock(&gt->i915->drm.struct_mutex);
 
 	/* Flush any requests before we get started and check basics */
 	if (!igt_force_reset(gt))
@@ -1709,9 +1656,7 @@ static int igt_reset_engines_atomic(void *arg)
 out:
 	/* As we poke around the guts, do a full reset before continuing. */
 	igt_force_reset(gt);
-
 unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	igt_global_reset_unlock(gt);
 
 	return err;
@@ -1751,10 +1696,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 
 	err = intel_gt_live_subtests(tests, gt);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	igt_flush_test(gt->i915, I915_WAIT_LOCKED);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
-
 	i915_modparams.enable_hangcheck = saved_hangcheck;
 	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 01d5a26b9aa7..7d3d9c53c416 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -61,7 +61,7 @@ static int live_sanitycheck(void *arg)
 		}
 
 		igt_spinner_end(&spin);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+		if (igt_flush_test(i915)) {
 			err = -EIO;
 			goto err_ctx;
 		}
@@ -388,8 +388,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
 	if (err)
 		goto out;
 
-	if (i915_request_wait(head,
-			      I915_WAIT_LOCKED,
+	if (i915_request_wait(head, 0,
 			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
 		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
 		       count, n);
@@ -461,7 +460,7 @@ static int live_timeslice_preempt(void *arg)
 			if (err)
 				goto err_pin;
 
-			if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+			if (igt_flush_test(i915)) {
 				err = -EIO;
 				goto err_pin;
 			}
@@ -1014,7 +1013,7 @@ static int live_nopreempt(void *arg)
 			goto err_wedged;
 		}
 
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		if (igt_flush_test(i915))
 			goto err_wedged;
 	}
 
@@ -1079,7 +1078,7 @@ static int live_suppress_self_preempt(void *arg)
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		if (igt_flush_test(i915))
 			goto err_wedged;
 
 		intel_engine_pm_get(engine);
@@ -1140,7 +1139,7 @@ static int live_suppress_self_preempt(void *arg)
 		}
 
 		intel_engine_pm_put(engine);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		if (igt_flush_test(i915))
 			goto err_wedged;
 	}
 
@@ -1301,7 +1300,7 @@ static int live_suppress_wait_preempt(void *arg)
 			for (i = 0; i < ARRAY_SIZE(client); i++)
 				igt_spinner_end(&client[i].spin);
 
-			if (igt_flush_test(i915, I915_WAIT_LOCKED))
+			if (igt_flush_test(i915))
 				goto err_wedged;
 
 			if (engine->execlists.preempt_hang.count) {
@@ -1580,7 +1579,7 @@ static int live_preempt_hang(void *arg)
 
 		igt_spinner_end(&spin_hi);
 		igt_spinner_end(&spin_lo);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+		if (igt_flush_test(i915)) {
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -1977,7 +1976,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		prime, div64_u64(ktime_to_ns(times[1]), prime));
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (nc = 0; nc < nctx; nc++) {
@@ -2122,7 +2121,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 		goto out;
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (n = 0; n < nsibling; n++)
@@ -2300,7 +2299,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 out:
 	for (n = 0; !IS_ERR(rq[n]); n++)
 		i915_request_put(rq[n]);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	kernel_context_close(ctx);
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 321481403165..16abfabf08c7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -6,7 +6,7 @@
 
 #include <linux/prime_numbers.h>
 
-#include "gem/i915_gem_pm.h"
+#include "intel_engine_pm.h"
 #include "intel_gt.h"
 
 #include "../selftests/i915_random.h"
@@ -136,7 +136,6 @@ static int mock_hwsp_freelist(void *arg)
 		goto err_put;
 	}
 
-	mutex_lock(&state.i915->drm.struct_mutex);
 	for (p = phases; p->name; p++) {
 		pr_debug("%s(%s)\n", __func__, p->name);
 		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
@@ -149,7 +148,6 @@ static int mock_hwsp_freelist(void *arg)
 out:
 	for (na = 0; na < state.max; na++)
 		__mock_hwsp_record(&state, na, NULL);
-	mutex_unlock(&state.i915->drm.struct_mutex);
 	kfree(state.history);
 err_put:
 	drm_dev_put(&state.i915->drm);
@@ -449,8 +447,6 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 	struct i915_request *rq;
 	int err;
 
-	lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */
-
 	err = intel_timeline_pin(tl);
 	if (err) {
 		rq = ERR_PTR(err);
@@ -461,10 +457,14 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
 	if (IS_ERR(rq))
 		goto out_unpin;
 
+	i915_request_get(rq);
+
 	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
 	i915_request_add(rq);
-	if (err)
+	if (err) {
+		i915_request_put(rq);
 		rq = ERR_PTR(err);
+	}
 
 out_unpin:
 	intel_timeline_unpin(tl);
@@ -500,7 +500,6 @@ static int live_hwsp_engine(void *arg)
 	struct intel_timeline **timelines;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	int err = 0;
 
@@ -515,14 +514,13 @@ static int live_hwsp_engine(void *arg)
 	if (!timelines)
 		return -ENOMEM;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
 	for_each_engine(engine, i915, id) {
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
+		intel_engine_pm_get(engine);
+
 		for (n = 0; n < NUM_TIMELINES; n++) {
 			struct intel_timeline *tl;
 			struct i915_request *rq;
@@ -530,22 +528,26 @@ static int live_hwsp_engine(void *arg)
 			tl = checked_intel_timeline_create(i915);
 			if (IS_ERR(tl)) {
 				err = PTR_ERR(tl);
-				goto out;
+				break;
 			}
 
 			rq = tl_write(tl, engine, count);
 			if (IS_ERR(rq)) {
 				intel_timeline_put(tl);
 				err = PTR_ERR(rq);
-				goto out;
+				break;
 			}
 
 			timelines[count++] = tl;
+			i915_request_put(rq);
 		}
+
+		intel_engine_pm_put(engine);
+		if (err)
+			break;
 	}
 
-out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (n = 0; n < count; n++) {
@@ -559,11 +561,7 @@ static int live_hwsp_engine(void *arg)
 		intel_timeline_put(tl);
 	}
 
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	kvfree(timelines);
-
 	return err;
 #undef NUM_TIMELINES
 }
@@ -575,7 +573,6 @@ static int live_hwsp_alternate(void *arg)
 	struct intel_timeline **timelines;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	unsigned long count, n;
 	int err = 0;
 
@@ -591,9 +588,6 @@ static int live_hwsp_alternate(void *arg)
 	if (!timelines)
 		return -ENOMEM;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
 	for (n = 0; n < NUM_TIMELINES; n++) {
 		for_each_engine(engine, i915, id) {
@@ -605,11 +599,14 @@ static int live_hwsp_alternate(void *arg)
 
 			tl = checked_intel_timeline_create(i915);
 			if (IS_ERR(tl)) {
+				intel_engine_pm_put(engine);
 				err = PTR_ERR(tl);
 				goto out;
 			}
 
+			intel_engine_pm_get(engine);
 			rq = tl_write(tl, engine, count);
+			intel_engine_pm_put(engine);
 			if (IS_ERR(rq)) {
 				intel_timeline_put(tl);
 				err = PTR_ERR(rq);
@@ -617,11 +614,12 @@ static int live_hwsp_alternate(void *arg)
 			}
 
 			timelines[count++] = tl;
+			i915_request_put(rq);
 		}
 	}
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	for (n = 0; n < count; n++) {
@@ -635,11 +633,7 @@ static int live_hwsp_alternate(void *arg)
 		intel_timeline_put(tl);
 	}
 
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	kvfree(timelines);
-
 	return err;
 #undef NUM_TIMELINES
 }
@@ -650,7 +644,6 @@ static int live_hwsp_wrap(void *arg)
 	struct intel_engine_cs *engine;
 	struct intel_timeline *tl;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	/*
@@ -658,14 +651,10 @@ static int live_hwsp_wrap(void *arg)
 	 * foreign GPU references.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	tl = intel_timeline_create(&i915->gt, NULL);
-	if (IS_ERR(tl)) {
-		err = PTR_ERR(tl);
-		goto out_rpm;
-	}
+	if (IS_ERR(tl))
+		return PTR_ERR(tl);
+
 	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
 		goto out_free;
 
@@ -681,7 +670,9 @@ static int live_hwsp_wrap(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
+		intel_engine_pm_get(engine);
 		rq = i915_request_create(engine->kernel_context);
+		intel_engine_pm_put(engine);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto out;
@@ -747,16 +738,12 @@ static int live_hwsp_wrap(void *arg)
 	}
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	intel_timeline_unpin(tl);
 out_free:
 	intel_timeline_put(tl);
-out_rpm:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return err;
 }
 
@@ -765,7 +752,6 @@ static int live_hwsp_recycle(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	unsigned long count;
 	int err = 0;
 
@@ -775,9 +761,6 @@ static int live_hwsp_recycle(void *arg)
 	 * want to confuse ourselves or the GPU.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	count = 0;
 	for_each_engine(engine, i915, id) {
 		IGT_TIMEOUT(end_time);
@@ -785,6 +768,8 @@ static int live_hwsp_recycle(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
+		intel_engine_pm_get(engine);
+
 		do {
 			struct intel_timeline *tl;
 			struct i915_request *rq;
@@ -792,21 +777,22 @@ static int live_hwsp_recycle(void *arg)
 			tl = checked_intel_timeline_create(i915);
 			if (IS_ERR(tl)) {
 				err = PTR_ERR(tl);
-				goto out;
+				break;
 			}
 
 			rq = tl_write(tl, engine, count);
 			if (IS_ERR(rq)) {
 				intel_timeline_put(tl);
 				err = PTR_ERR(rq);
-				goto out;
+				break;
 			}
 
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				pr_err("Wait for timeline writes timed out!\n");
+				i915_request_put(rq);
 				intel_timeline_put(tl);
 				err = -EIO;
-				goto out;
+				break;
 			}
 
 			if (*tl->hwsp_seqno != count) {
@@ -815,17 +801,18 @@ static int live_hwsp_recycle(void *arg)
 				err = -EINVAL;
 			}
 
+			i915_request_put(rq);
 			intel_timeline_put(tl);
 			count++;
 
 			if (err)
-				goto out;
+				break;
 		} while (!__igt_timeout(end_time, NULL));
-	}
 
-out:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+		intel_engine_pm_put(engine);
+		if (err)
+			break;
+	}
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index d40ce0709bff..4ee2e2babd0d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -676,7 +676,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 			break;
 	}
 
-	if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(ctx->i915))
 		err = -EIO;
 out_batch:
 	i915_vma_unpin_and_release(&batch, 0);
@@ -1090,7 +1090,7 @@ static int live_isolated_whitelist(void *arg)
 		kernel_context_close(client[i].ctx);
 	}
 
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 
 	return err;
@@ -1248,7 +1248,7 @@ live_engine_reset_workarounds(void *arg)
 	igt_global_reset_unlock(&i915->gt);
 	kernel_context_close(ctx);
 
-	igt_flush_test(i915, I915_WAIT_LOCKED);
+	igt_flush_test(i915);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index fec9fb7cc384..385289895107 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3621,6 +3621,7 @@ static int
 i915_drop_caches_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
+	int ret;
 
 	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
 		  val, val & DROP_ALL);
@@ -3630,40 +3631,21 @@ i915_drop_caches_set(void *data, u64 val)
 		     I915_IDLE_ENGINES_TIMEOUT))
 		intel_gt_set_wedged(&i915->gt);
 
-	/* No need to check and wait for gpu resets, only libdrm auto-restarts
-	 * on ioctls on -EAGAIN. */
-	if (val & (DROP_ACTIVE | DROP_IDLE | DROP_RETIRE | DROP_RESET_SEQNO)) {
-		int ret;
+	if (val & DROP_RETIRE)
+		i915_retire_requests(i915);
 
-		ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+	if (val & (DROP_IDLE | DROP_ACTIVE)) {
+		ret = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_INTERRUPTIBLE,
+					     MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
+	}
 
-		/*
-		 * To finish the flush of the idle_worker, we must complete
-		 * the switch-to-kernel-context, which requires a double
-		 * pass through wait_for_idle: first queues the switch,
-		 * second waits for the switch.
-		 */
-		if (ret == 0 && val & (DROP_IDLE | DROP_ACTIVE))
-			ret = i915_gem_wait_for_idle(i915,
-						     I915_WAIT_INTERRUPTIBLE |
-						     I915_WAIT_LOCKED,
-						     MAX_SCHEDULE_TIMEOUT);
-
-		if (ret == 0 && val & DROP_IDLE)
-			ret = i915_gem_wait_for_idle(i915,
-						     I915_WAIT_INTERRUPTIBLE |
-						     I915_WAIT_LOCKED,
-						     MAX_SCHEDULE_TIMEOUT);
-
-		if (val & DROP_RETIRE)
-			i915_retire_requests(i915);
-
-		mutex_unlock(&i915->drm.struct_mutex);
-
-		if (ret == 0 && val & DROP_IDLE)
-			ret = intel_gt_pm_wait_for_idle(&i915->gt);
+	if (val & DROP_IDLE) {
+		ret = intel_gt_pm_wait_for_idle(&i915->gt);
+		if (ret)
+			return ret;
 	}
 
 	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 64890627d638..e4c553d9aa07 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -945,19 +945,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	if (!intel_gt_pm_is_awake(gt))
 		return 0;
 
-	GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
-		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
-		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
-
-	timeout = wait_for_timelines(gt, flags, timeout);
-	if (timeout < 0)
-		return timeout;
+	do {
+		timeout = wait_for_timelines(gt, flags, timeout);
+		if (timeout < 0)
+			return timeout;
 
-	if (flags & I915_WAIT_LOCKED) {
-		lockdep_assert_held(&i915->drm.struct_mutex);
+		cond_resched();
+		if (signal_pending(current))
+			return -EINTR;
 
-		i915_retire_requests(i915);
-	}
+	} while (i915_retire_requests(i915));
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 91a885c36c6b..621fb33cda30 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -308,10 +308,9 @@ long i915_request_wait(struct i915_request *rq,
 		       long timeout)
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
-#define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
-#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
-#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
+#define I915_WAIT_PRIORITY	BIT(1) /* small priority bump for the request */
+#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
+#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
 
 static inline bool i915_request_signaled(const struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 2cc71bcf884f..268192b5613b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -162,10 +162,8 @@ static int live_active_wait(void *arg)
 
 	__live_put(active);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
 }
@@ -183,10 +181,8 @@ static int live_active_retire(void *arg)
 		return PTR_ERR(active);
 
 	/* waits for & retires all requests */
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (!READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after flushing!\n");
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 75a4695b82bb..52d2df843148 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -523,7 +523,7 @@ static int igt_evict_contexts(void *arg)
 
 	mutex_lock(&i915->ggtt.vm.mutex);
 out_locked:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 	while (reserved) {
 		struct reserved *next = reserved->next;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 373fbd58a17a..9939dd40c6a8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1703,12 +1703,8 @@ int i915_gem_gtt_mock_selftests(void)
 
 	err = i915_subtests(tests, ggtt);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_drain_freed_objects(i915);
-
 	mock_fini_ggtt(ggtt);
 	kfree(ggtt);
 out_put:
@@ -2002,7 +1998,7 @@ static int igt_cs_tlb(void *arg)
 		}
 	}
 end:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
 	i915_gem_context_unlock_engines(ctx);
 	i915_gem_object_unpin_map(out);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index eb175da48547..d7d68c6a6bd5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -41,21 +41,16 @@ static int igt_add_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct i915_request *request;
-	int err = -ENOMEM;
 
 	/* Basic preliminary test to create a request and let it loose! */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
 	if (!request)
-		goto out_unlock;
+		return -ENOMEM;
 
 	i915_request_add(request);
 
-	err = 0;
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
+	return 0;
 }
 
 static int igt_wait_request(void *arg)
@@ -67,12 +62,10 @@ static int igt_wait_request(void *arg)
 
 	/* Submit a request, then wait upon it */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
-	if (!request) {
-		err = -ENOMEM;
-		goto out_unlock;
-	}
+	if (!request)
+		return -ENOMEM;
+
 	i915_request_get(request);
 
 	if (i915_request_wait(request, 0, 0) != -ETIME) {
@@ -125,9 +118,7 @@ static int igt_wait_request(void *arg)
 	err = 0;
 out_request:
 	i915_request_put(request);
-out_unlock:
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -140,52 +131,45 @@ static int igt_fence_wait(void *arg)
 
 	/* Submit a request, treat it as a fence and wait upon it */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
-	if (!request) {
-		err = -ENOMEM;
-		goto out_locked;
-	}
+	if (!request)
+		return -ENOMEM;
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
 		pr_err("fence wait success before submit (expected timeout)!\n");
-		goto out_locked;
+		goto out;
 	}
 
 	i915_request_add(request);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (dma_fence_is_signaled(&request->fence)) {
 		pr_err("fence signaled immediately!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
 		pr_err("fence wait success after submit (expected timeout)!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 		pr_err("fence wait timed out (expected success)!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (!dma_fence_is_signaled(&request->fence)) {
 		pr_err("fence unsignaled after waiting!\n");
-		goto out_device;
+		goto out;
 	}
 
 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 		pr_err("fence wait timed out when complete (expected success)!\n");
-		goto out_device;
+		goto out;
 	}
 
 	err = 0;
-out_device:
-	mutex_lock(&i915->drm.struct_mutex);
-out_locked:
+out:
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -199,6 +183,8 @@ static int igt_request_rewind(void *arg)
 
 	mutex_lock(&i915->drm.struct_mutex);
 	ctx[0] = mock_context(i915, "A");
+	mutex_unlock(&i915->drm.struct_mutex);
+
 	ce = i915_gem_context_get_engine(ctx[0], RCS0);
 	GEM_BUG_ON(IS_ERR(ce));
 	request = mock_request(ce, 2 * HZ);
@@ -211,7 +197,10 @@ static int igt_request_rewind(void *arg)
 	i915_request_get(request);
 	i915_request_add(request);
 
+	mutex_lock(&i915->drm.struct_mutex);
 	ctx[1] = mock_context(i915, "B");
+	mutex_unlock(&i915->drm.struct_mutex);
+
 	ce = i915_gem_context_get_engine(ctx[1], RCS0);
 	GEM_BUG_ON(IS_ERR(ce));
 	vip = mock_request(ce, 0);
@@ -233,7 +222,6 @@ static int igt_request_rewind(void *arg)
 	request->engine->submit_request(request);
 	rcu_read_unlock();
 
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
 		pr_err("timed out waiting for high priority request\n");
@@ -248,14 +236,12 @@ static int igt_request_rewind(void *arg)
 	err = 0;
 err:
 	i915_request_put(vip);
-	mutex_lock(&i915->drm.struct_mutex);
 err_context_1:
 	mock_context_close(ctx[1]);
 	i915_request_put(request);
 err_context_0:
 	mock_context_close(ctx[0]);
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -282,7 +268,6 @@ __live_request_alloc(struct intel_context *ce)
 static int __igt_breadcrumbs_smoketest(void *arg)
 {
 	struct smoketest *t = arg;
-	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
 	const unsigned int total = 4 * t->ncontexts + 1;
 	unsigned int num_waits = 0, num_fences = 0;
@@ -337,14 +322,11 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 			struct i915_request *rq;
 			struct intel_context *ce;
 
-			mutex_lock(BKL);
-
 			ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
 			GEM_BUG_ON(IS_ERR(ce));
 			rq = t->request_alloc(ce);
 			intel_context_put(ce);
 			if (IS_ERR(rq)) {
-				mutex_unlock(BKL);
 				err = PTR_ERR(rq);
 				count = n;
 				break;
@@ -357,8 +339,6 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 			requests[n] = i915_request_get(rq);
 			i915_request_add(rq);
 
-			mutex_unlock(BKL);
-
 			if (err >= 0)
 				err = i915_sw_fence_await_dma_fence(wait,
 								    &rq->fence,
@@ -457,15 +437,15 @@ static int mock_breadcrumbs_smoketest(void *arg)
 		goto out_threads;
 	}
 
-	mutex_lock(&t.engine->i915->drm.struct_mutex);
 	for (n = 0; n < t.ncontexts; n++) {
+		mutex_lock(&t.engine->i915->drm.struct_mutex);
 		t.contexts[n] = mock_context(t.engine->i915, "mock");
+		mutex_unlock(&t.engine->i915->drm.struct_mutex);
 		if (!t.contexts[n]) {
 			ret = -ENOMEM;
 			goto out_contexts;
 		}
 	}
-	mutex_unlock(&t.engine->i915->drm.struct_mutex);
 
 	for (n = 0; n < ncpus; n++) {
 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
@@ -495,18 +475,15 @@ static int mock_breadcrumbs_smoketest(void *arg)
 		atomic_long_read(&t.num_fences),
 		ncpus);
 
-	mutex_lock(&t.engine->i915->drm.struct_mutex);
 out_contexts:
 	for (n = 0; n < t.ncontexts; n++) {
 		if (!t.contexts[n])
 			break;
 		mock_context_close(t.contexts[n]);
 	}
-	mutex_unlock(&t.engine->i915->drm.struct_mutex);
 	kfree(t.contexts);
 out_threads:
 	kfree(threads);
-
 	return ret;
 }
 
@@ -539,7 +516,6 @@ static int live_nop_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	unsigned int id;
 	int err = -ENODEV;
@@ -549,28 +525,25 @@ static int live_nop_request(void *arg)
 	 * the overhead of submitting requests to the hardware.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	for_each_engine(engine, i915, id) {
-		struct i915_request *request = NULL;
 		unsigned long n, prime;
 		IGT_TIMEOUT(end_time);
 		ktime_t times[2] = {};
 
 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 		if (err)
-			goto out_unlock;
+			return err;
 
 		for_each_prime_number_from(prime, 1, 8192) {
+			struct i915_request *request = NULL;
+
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
+				i915_request_put(request);
 				request = i915_request_create(engine->kernel_context);
-				if (IS_ERR(request)) {
-					err = PTR_ERR(request);
-					goto out_unlock;
-				}
+				if (IS_ERR(request))
+					return PTR_ERR(request);
 
 				/* This space is left intentionally blank.
 				 *
@@ -585,9 +558,11 @@ static int live_nop_request(void *arg)
 				 * for latency.
 				 */
 
+				i915_request_get(request);
 				i915_request_add(request);
 			}
 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
+			i915_request_put(request);
 
 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
 			if (prime == 1)
@@ -599,7 +574,7 @@ static int live_nop_request(void *arg)
 
 		err = igt_live_test_end(&t);
 		if (err)
-			goto out_unlock;
+			return err;
 
 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
 			engine->name,
@@ -607,9 +582,6 @@ static int live_nop_request(void *arg)
 			prime, div64_u64(ktime_to_ns(times[1]), prime));
 	}
 
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -679,6 +651,7 @@ empty_request(struct intel_engine_cs *engine,
 	if (err)
 		goto out_request;
 
+	i915_request_get(request);
 out_request:
 	i915_request_add(request);
 	return err ? ERR_PTR(err) : request;
@@ -688,7 +661,6 @@ static int live_empty_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	struct i915_vma *batch;
 	unsigned int id;
@@ -699,14 +671,9 @@ static int live_empty_request(void *arg)
 	 * the overhead of submitting requests to the hardware.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	batch = empty_batch(i915);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto out_unlock;
-	}
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
 
 	for_each_engine(engine, i915, id) {
 		IGT_TIMEOUT(end_time);
@@ -730,6 +697,7 @@ static int live_empty_request(void *arg)
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
+				i915_request_put(request);
 				request = empty_request(engine, batch);
 				if (IS_ERR(request)) {
 					err = PTR_ERR(request);
@@ -745,6 +713,7 @@ static int live_empty_request(void *arg)
 			if (__igt_timeout(end_time, NULL))
 				break;
 		}
+		i915_request_put(request);
 
 		err = igt_live_test_end(&t);
 		if (err)
@@ -759,9 +728,6 @@ static int live_empty_request(void *arg)
 out_batch:
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -841,7 +807,6 @@ static int live_all_engines(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
 	struct i915_request *request[I915_NUM_ENGINES];
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	struct i915_vma *batch;
 	unsigned int id;
@@ -852,18 +817,15 @@ static int live_all_engines(void *arg)
 	 * block doing so, and that they don't complete too soon.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		return err;
 
 	batch = recursive_batch(i915);
 	if (IS_ERR(batch)) {
 		err = PTR_ERR(batch);
 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
-		goto out_unlock;
+		return err;
 	}
 
 	for_each_engine(engine, i915, id) {
@@ -933,9 +895,6 @@ static int live_all_engines(void *arg)
 			i915_request_put(request[id]);
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -945,7 +904,6 @@ static int live_sequential_engines(void *arg)
 	struct i915_request *request[I915_NUM_ENGINES] = {};
 	struct i915_request *prev = NULL;
 	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	unsigned int id;
 	int err;
@@ -956,12 +914,9 @@ static int live_sequential_engines(void *arg)
 	 * they are running on independent engines.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		return err;
 
 	for_each_engine(engine, i915, id) {
 		struct i915_vma *batch;
@@ -971,7 +926,7 @@ static int live_sequential_engines(void *arg)
 			err = PTR_ERR(batch);
 			pr_err("%s: Unable to create batch for %s, err=%d\n",
 			       __func__, engine->name, err);
-			goto out_unlock;
+			return err;
 		}
 
 		request[id] = i915_request_create(engine->kernel_context);
@@ -1063,9 +1018,6 @@ static int live_sequential_engines(void *arg)
 		i915_vma_put(request[id]->batch);
 		i915_request_put(request[id]);
 	}
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1080,16 +1032,12 @@ static int __live_parallel_engine1(void *arg)
 		struct i915_request *rq;
 		int err;
 
-		mutex_lock(&engine->i915->drm.struct_mutex);
 		rq = i915_request_create(engine->kernel_context);
-		if (IS_ERR(rq)) {
-			mutex_unlock(&engine->i915->drm.struct_mutex);
+		if (IS_ERR(rq))
 			return PTR_ERR(rq);
-		}
 
 		i915_request_get(rq);
 		i915_request_add(rq);
-		mutex_unlock(&engine->i915->drm.struct_mutex);
 
 		err = 0;
 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
@@ -1115,16 +1063,11 @@ static int __live_parallel_engineN(void *arg)
 	do {
 		struct i915_request *rq;
 
-		mutex_lock(&engine->i915->drm.struct_mutex);
 		rq = i915_request_create(engine->kernel_context);
-		if (IS_ERR(rq)) {
-			mutex_unlock(&engine->i915->drm.struct_mutex);
+		if (IS_ERR(rq))
 			return PTR_ERR(rq);
-		}
 
 		i915_request_add(rq);
-		mutex_unlock(&engine->i915->drm.struct_mutex);
-
 		count++;
 	} while (!__igt_timeout(end_time, NULL));
 
@@ -1154,9 +1097,7 @@ static int live_parallel_engines(void *arg)
 		struct task_struct *tsk[I915_NUM_ENGINES] = {};
 		struct igt_live_test t;
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = igt_live_test_begin(&t, i915, __func__, "");
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			break;
 
@@ -1184,10 +1125,8 @@ static int live_parallel_engines(void *arg)
 			put_task_struct(tsk[id]);
 		}
 
-		mutex_lock(&i915->drm.struct_mutex);
 		if (igt_live_test_end(&t))
 			err = -EIO;
-		mutex_unlock(&i915->drm.struct_mutex);
 	}
 
 	return err;
@@ -1280,9 +1219,10 @@ static int live_breadcrumbs_smoketest(void *arg)
 		goto out_threads;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	for (n = 0; n < t[0].ncontexts; n++) {
+		mutex_lock(&i915->drm.struct_mutex);
 		t[0].contexts[n] = live_context(i915, file);
+		mutex_unlock(&i915->drm.struct_mutex);
 		if (!t[0].contexts[n]) {
 			ret = -ENOMEM;
 			goto out_contexts;
@@ -1299,7 +1239,6 @@ static int live_breadcrumbs_smoketest(void *arg)
 		t[id].max_batch = max_batches(t[0].contexts[0], engine);
 		if (t[id].max_batch < 0) {
 			ret = t[id].max_batch;
-			mutex_unlock(&i915->drm.struct_mutex);
 			goto out_flush;
 		}
 		/* One ring interleaved between requests from all cpus */
@@ -1314,7 +1253,6 @@ static int live_breadcrumbs_smoketest(void *arg)
 					  &t[id], "igt/%d.%d", id, n);
 			if (IS_ERR(tsk)) {
 				ret = PTR_ERR(tsk);
-				mutex_unlock(&i915->drm.struct_mutex);
 				goto out_flush;
 			}
 
@@ -1322,7 +1260,6 @@ static int live_breadcrumbs_smoketest(void *arg)
 			threads[id * ncpus + n] = tsk;
 		}
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 
@@ -1350,10 +1287,8 @@ static int live_breadcrumbs_smoketest(void *arg)
 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	ret = igt_live_test_end(&live) ?: ret;
 out_contexts:
-	mutex_unlock(&i915->drm.struct_mutex);
 	kfree(t[0].contexts);
 out_threads:
 	kfree(threads);
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 438ea0eaa416..825a8286cbe8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -263,10 +263,8 @@ int __i915_live_teardown(int err, void *data)
 {
 	struct drm_i915_private *i915 = data;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	i915_gem_drain_freed_objects(i915);
 
@@ -284,10 +282,8 @@ int __intel_gt_live_teardown(int err, void *data)
 {
 	struct intel_gt *gt = data;
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-	if (igt_flush_test(gt->i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 
 	i915_gem_drain_freed_objects(gt->i915);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 0e4f66312b39..1c9db08f7c28 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -833,12 +833,8 @@ int i915_vma_mock_selftests(void)
 
 	err = i915_subtests(tests, ggtt);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	i915_gem_drain_freed_objects(i915);
-
 	mock_fini_ggtt(ggtt);
 	kfree(ggtt);
 out_put:
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index d3b5eb402d33..2a5fbe46ea9f 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -12,31 +12,25 @@
 
 #include "igt_flush_test.h"
 
-int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
+int igt_flush_test(struct drm_i915_private *i915)
 {
 	int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0;
-	int repeat = !!(flags & I915_WAIT_LOCKED);
 
 	cond_resched();
 
-	do {
-		if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
-			pr_err("%pS timed out, cancelling all further testing.\n",
-			       __builtin_return_address(0));
+	i915_retire_requests(i915);
+	if (i915_gem_wait_for_idle(i915, 0, HZ / 5) == -ETIME) {
+		pr_err("%pS timed out, cancelling all further testing.\n",
+		       __builtin_return_address(0));
 
-			GEM_TRACE("%pS timed out.\n",
-				  __builtin_return_address(0));
-			GEM_TRACE_DUMP();
+		GEM_TRACE("%pS timed out.\n",
+			  __builtin_return_address(0));
+		GEM_TRACE_DUMP();
 
-			intel_gt_set_wedged(&i915->gt);
-			repeat = 0;
-			ret = -EIO;
-		}
-
-		/* Ensure we also flush after wedging. */
-		if (flags & I915_WAIT_LOCKED)
-			i915_retire_requests(i915);
-	} while (repeat--);
+		intel_gt_set_wedged(&i915->gt);
+		ret = -EIO;
+	}
+	i915_retire_requests(i915);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h
index 63e009927c43..7541fa74e641 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.h
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h
@@ -9,6 +9,6 @@
 
 struct drm_i915_private;
 
-int igt_flush_test(struct drm_i915_private *i915, unsigned int flags);
+int igt_flush_test(struct drm_i915_private *i915);
 
 #endif /* IGT_FLUSH_TEST_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 3e902761cd16..04a6f88fdf64 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -19,15 +19,12 @@ int igt_live_test_begin(struct igt_live_test *t,
 	enum intel_engine_id id;
 	int err;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	t->i915 = i915;
 	t->func = func;
 	t->name = name;
 
 	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE |
-				     I915_WAIT_LOCKED,
+				     I915_WAIT_INTERRUPTIBLE,
 				     MAX_SCHEDULE_TIMEOUT);
 	if (err) {
 		pr_err("%s(%s): failed to idle before, with err=%d!",
@@ -50,9 +47,7 @@ int igt_live_test_end(struct igt_live_test *t)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(i915))
 		return -EIO;
 
 	if (t->reset_global != i915_reset_count(&i915->gpu_error)) {
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 2448067822af..622bb2127453 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -41,8 +41,6 @@ void mock_device_flush(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	do {
 		for_each_engine(engine, i915, id)
 			mock_engine_flush(engine);
@@ -55,9 +53,7 @@ static void mock_device_release(struct drm_device *dev)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	flush_work(&i915->gem.idle_work);
 	i915_gem_drain_workqueue(i915);
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 11/30] drm/i915: Remove the GEM idle worker
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (8 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 10/30] drm/i915: Drop struct_mutex from around i915_retire_requests() Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 12/30] drm/i915: Merge wait_for_timelines with retire_request Chris Wilson
                   ` (24 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Nothing inside the idle worker now requires struct_mutex, so we can
remove the indirection of using our own worker.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        | 28 ++-----------------
 .../drm/i915/gem/selftests/i915_gem_mman.c    |  3 --
 drivers/gpu/drm/i915/i915_debugfs.c           |  5 ----
 drivers/gpu/drm/i915/i915_drv.h               |  9 ------
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  6 ----
 5 files changed, 2 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 2ddc3aeaac9d..26f325bbfe4d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -13,36 +13,13 @@
 
 static void i915_gem_park(struct drm_i915_private *i915)
 {
-	lockdep_assert_held(&i915->drm.struct_mutex);
+	cancel_delayed_work(&i915->gem.retire_work);
 
 	i915_vma_parked(i915);
 
 	i915_globals_park();
 }
 
-static void idle_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), gem.idle_work);
-	bool park;
-
-	cancel_delayed_work_sync(&i915->gem.retire_work);
-	mutex_lock(&i915->drm.struct_mutex);
-
-	intel_wakeref_lock(&i915->gt.wakeref);
-	park = (!intel_wakeref_is_active(&i915->gt.wakeref) &&
-		!work_pending(work));
-	intel_wakeref_unlock(&i915->gt.wakeref);
-	if (park)
-		i915_gem_park(i915);
-	else
-		queue_delayed_work(i915->wq,
-				   &i915->gem.retire_work,
-				   round_jiffies_up_relative(HZ));
-
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
 static void retire_work_handler(struct work_struct *work)
 {
 	struct drm_i915_private *i915 =
@@ -71,7 +48,7 @@ static int pm_notifier(struct notifier_block *nb,
 		break;
 
 	case INTEL_GT_PARK:
-		queue_work(i915->wq, &i915->gem.idle_work);
+		i915_gem_park(i915);
 		break;
 	}
 
@@ -264,7 +241,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
 
 void i915_gem_init__pm(struct drm_i915_private *i915)
 {
-	INIT_WORK(&i915->gem.idle_work, idle_work_handler);
 	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
 
 	i915->gem.pm_notifier.notifier_call = pm_notifier;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 856b8e467ee8..4ba6ed5c8313 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -572,11 +572,8 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
 static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	i915_gem_driver_unregister__shrinker(i915);
-
 	intel_gt_pm_get(&i915->gt);
-
 	cancel_delayed_work_sync(&i915->gem.retire_work);
-	flush_work(&i915->gem.idle_work);
 }
 
 static void restore_retire_worker(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 385289895107..7c4bba21adcd 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3662,11 +3662,6 @@ i915_drop_caches_set(void *data, u64 val)
 		i915_gem_shrink_all(i915);
 	fs_reclaim_release(GFP_KERNEL);
 
-	if (val & DROP_IDLE) {
-		flush_delayed_work(&i915->gem.retire_work);
-		flush_work(&i915->gem.idle_work);
-	}
-
 	if (val & DROP_FREED)
 		i915_gem_drain_freed_objects(i915);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 337d8306416a..ad31852e4309 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1719,15 +1719,6 @@ struct drm_i915_private {
 		 * fires, go retire requests.
 		 */
 		struct delayed_work retire_work;
-
-		/**
-		 * When we detect an idle GPU, we want to turn on
-		 * powersaving features. So once we see that there
-		 * are no more requests outstanding and no more
-		 * arrive within a small period of time, we fire
-		 * off the idle_work.
-		 */
-		struct work_struct idle_work;
 	} gem;
 
 	/* For i945gm vblank irq vs. C3 workaround */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 622bb2127453..a8be5da2b3cf 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -55,7 +55,6 @@ static void mock_device_release(struct drm_device *dev)
 
 	mock_device_flush(i915);
 
-	flush_work(&i915->gem.idle_work);
 	i915_gem_drain_workqueue(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
@@ -103,10 +102,6 @@ static void mock_retire_work_handler(struct work_struct *work)
 {
 }
 
-static void mock_idle_work_handler(struct work_struct *work)
-{
-}
-
 static int pm_domain_resume(struct device *dev)
 {
 	return pm_generic_runtime_resume(dev);
@@ -187,7 +182,6 @@ struct drm_i915_private *mock_gem_device(void)
 	mock_init_contexts(i915);
 
 	INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler);
-	INIT_WORK(&i915->gem.idle_work, mock_idle_work_handler);
 
 	intel_timelines_init(i915);
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 12/30] drm/i915: Merge wait_for_timelines with retire_request
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (9 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 11/30] drm/i915: Remove the GEM idle worker Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 13/30] drm/i915/gem: Retire directly for mmap-offset shrinking Chris Wilson
                   ` (23 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

wait_for_timelines is essentially the same loop as retiring requests
(with an extra timeout), so merge the two into one routine.

v2: i915_retire_requests_timeout and keep VT'd w/a as !interruptible

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      |  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  4 +-
 .../drm/i915/gem/selftests/i915_gem_context.c |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt_pm.c         |  3 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |  4 +-
 drivers/gpu/drm/i915/i915_drv.h               |  3 +-
 drivers/gpu/drm/i915/i915_gem.c               | 67 ++-----------------
 drivers/gpu/drm/i915/i915_gem_evict.c         | 12 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  4 +-
 drivers/gpu/drm/i915/i915_request.c           | 26 ++++++-
 drivers/gpu/drm/i915/i915_request.h           |  7 +-
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  4 +-
 .../gpu/drm/i915/selftests/igt_live_test.c    |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  2 +-
 14 files changed, 50 insertions(+), 96 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index c19431d609fc..418d0d2b5fa9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -432,9 +432,7 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj)
 
 	/* Attempt to reap some mmap space from dead objects */
 	do {
-		err = i915_gem_wait_for_idle(i915,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
+		err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 		if (err)
 			break;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 26f325bbfe4d..90b211257f2d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -59,9 +59,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt)
 {
 	bool result = !intel_gt_is_wedged(gt);
 
-	if (i915_gem_wait_for_idle(gt->i915,
-				   I915_WAIT_FOR_IDLE_BOOST,
-				   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 		/* XXX hide warning from gem_eio */
 		if (i915_modparams.reset) {
 			dev_err(gt->i915->drm.dev,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index f5402aad9b5a..f902aeee1755 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1137,7 +1137,7 @@ __sseu_finish(const char *name,
 
 	if ((flags & TEST_IDLE) && ret == 0) {
 		ret = i915_gem_wait_for_idle(ce->engine->i915,
-					     0, MAX_SCHEDULE_TIMEOUT);
+					     MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index d4cefdd38431..bdb34f03ec47 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -196,8 +196,7 @@ int intel_gt_resume(struct intel_gt *gt)
 
 static void wait_for_idle(struct intel_gt *gt)
 {
-	if (i915_gem_wait_for_idle(gt->i915, 0,
-				   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 		/*
 		 * Forcibly cancel outstanding work and leave
 		 * the gpu quiet.
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 7c4bba21adcd..5888a658e2b7 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3635,9 +3635,7 @@ i915_drop_caches_set(void *data, u64 val)
 		i915_retire_requests(i915);
 
 	if (val & (DROP_IDLE | DROP_ACTIVE)) {
-		ret = i915_gem_wait_for_idle(i915,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
+		ret = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ad31852e4309..44f3463ff9f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2321,8 +2321,7 @@ void i915_gem_driver_register(struct drm_i915_private *i915);
 void i915_gem_driver_unregister(struct drm_i915_private *i915);
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv);
 void i915_gem_driver_release(struct drm_i915_private *dev_priv);
-int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-			   unsigned int flags, long timeout);
+int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, long timeout);
 void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e4c553d9aa07..7c82fc39f655 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -883,61 +883,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	}
 }
 
-static long
-wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout)
-{
-	struct intel_gt_timelines *timelines = &gt->timelines;
-	struct intel_timeline *tl;
-	unsigned long flags;
-
-	spin_lock_irqsave(&timelines->lock, flags);
-	list_for_each_entry(tl, &timelines->active_list, link) {
-		struct dma_fence *fence;
-
-		fence = i915_active_fence_get(&tl->last_request);
-		if (!fence)
-			continue;
-
-		spin_unlock_irqrestore(&timelines->lock, flags);
-
-		if (!dma_fence_is_i915(fence)) {
-			timeout = dma_fence_wait_timeout(fence,
-							 flags & I915_WAIT_INTERRUPTIBLE,
-							 timeout);
-		} else {
-			struct i915_request *rq = to_request(fence);
-
-			/*
-			 * "Race-to-idle".
-			 *
-			 * Switching to the kernel context is often used as
-			 * a synchronous step prior to idling, e.g. in suspend
-			 * for flushing all current operations to memory before
-			 * sleeping. These we want to complete as quickly as
-			 * possible to avoid prolonged stalls, so allow the gpu
-			 * to boost to maximum clocks.
-			 */
-			if (flags & I915_WAIT_FOR_IDLE_BOOST)
-				gen6_rps_boost(rq);
-
-			timeout = i915_request_wait(rq, flags, timeout);
-		}
-
-		dma_fence_put(fence);
-		if (timeout < 0)
-			return timeout;
-
-		/* restart after reacquiring the lock */
-		spin_lock_irqsave(&timelines->lock, flags);
-		tl = list_entry(&timelines->active_list, typeof(*tl), link);
-	}
-	spin_unlock_irqrestore(&timelines->lock, flags);
-
-	return timeout;
-}
-
-int i915_gem_wait_for_idle(struct drm_i915_private *i915,
-			   unsigned int flags, long timeout)
+int i915_gem_wait_for_idle(struct drm_i915_private *i915, long timeout)
 {
 	struct intel_gt *gt = &i915->gt;
 
@@ -945,18 +891,13 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	if (!intel_gt_pm_is_awake(gt))
 		return 0;
 
-	do {
-		timeout = wait_for_timelines(gt, flags, timeout);
-		if (timeout < 0)
-			return timeout;
-
+	while ((timeout = i915_retire_requests_timeout(i915, timeout)) > 0) {
 		cond_resched();
 		if (signal_pending(current))
 			return -EINTR;
+	}
 
-	} while (i915_retire_requests(i915));
-
-	return 0;
+	return timeout;
 }
 
 struct i915_vma *
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 0552bf93eea3..0a412f6d01d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -46,9 +46,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	return i915_gem_wait_for_idle(i915,
-				      I915_WAIT_INTERRUPTIBLE,
-				      MAX_SCHEDULE_TIMEOUT);
+	return i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 }
 
 static bool
@@ -126,6 +124,8 @@ i915_gem_evict_something(struct i915_address_space *vm,
 				    min_size, alignment, color,
 				    start, end, mode);
 
+	i915_retire_requests(vm->i915);
+
 search_again:
 	active = NULL;
 	INIT_LIST_HEAD(&eviction_list);
@@ -264,13 +264,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 
 	trace_i915_gem_evict_node(vm, target, flags);
 
-	/* Retire before we search the active list. Although we have
+	/*
+	 * Retire before we search the active list. Although we have
 	 * reasonable accuracy in our retirement lists, we may have
 	 * a stray pin (preventing eviction) that can only be resolved by
 	 * retiring.
 	 */
-	if (!(flags & PIN_NONBLOCK))
-		i915_retire_requests(vm->i915);
+	i915_retire_requests(vm->i915);
 
 	if (i915_vm_has_cache_coloring(vm)) {
 		/* Expand search to cover neighbouring guard pages (or lack!) */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7462d87f7a48..082fcf9085a6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2528,7 +2528,9 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
+		/* XXX This does not prevent more requests being submitted! */
+		if (i915_retire_requests_timeout(dev_priv,
+						 -MAX_SCHEDULE_TIMEOUT)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4ffe62a42186..52f7c4e5b644 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1508,13 +1508,19 @@ long i915_request_wait(struct i915_request *rq,
 	return timeout;
 }
 
-bool i915_retire_requests(struct drm_i915_private *i915)
+long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout)
 {
 	struct intel_gt_timelines *timelines = &i915->gt.timelines;
 	struct intel_timeline *tl, *tn;
+	unsigned long active_count = 0;
 	unsigned long flags;
+	bool interruptible;
 	LIST_HEAD(free);
 
+	interruptible = true;
+	if (timeout < 0)
+		timeout = -timeout, interruptible = false;
+
 	spin_lock_irqsave(&timelines->lock, flags);
 	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
 		if (!mutex_trylock(&tl->mutex))
@@ -1525,13 +1531,27 @@ bool i915_retire_requests(struct drm_i915_private *i915)
 		tl->active_count++; /* pin the list element */
 		spin_unlock_irqrestore(&timelines->lock, flags);
 
+		if (timeout > 0) {
+			struct dma_fence *fence;
+
+			fence = i915_active_fence_get(&tl->last_request);
+			if (fence) {
+				timeout = dma_fence_wait_timeout(fence,
+								 interruptible,
+								 timeout);
+				dma_fence_put(fence);
+			}
+		}
+
 		retire_requests(tl);
 
 		spin_lock_irqsave(&timelines->lock, flags);
 
 		/* Resume iteration after dropping lock */
 		list_safe_reset_next(tl, tn, link);
-		if (!--tl->active_count)
+		if (--tl->active_count)
+			active_count += !!rcu_access_pointer(tl->last_request.fence);
+		else
 			list_del(&tl->link);
 
 		mutex_unlock(&tl->mutex);
@@ -1547,7 +1567,7 @@ bool i915_retire_requests(struct drm_i915_private *i915)
 	list_for_each_entry_safe(tl, tn, &free, link)
 		__intel_timeline_free(&tl->kref);
 
-	return !list_empty(&timelines->active_list);
+	return active_count ? timeout : 0;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 621fb33cda30..256b0715180f 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -310,7 +310,6 @@ long i915_request_wait(struct i915_request *rq,
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_PRIORITY	BIT(1) /* small priority bump for the request */
 #define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
-#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
 
 static inline bool i915_request_signaled(const struct i915_request *rq)
 {
@@ -460,6 +459,10 @@ i915_request_active_timeline(struct i915_request *rq)
 					 lockdep_is_held(&rq->engine->active.lock));
 }
 
-bool i915_retire_requests(struct drm_i915_private *i915);
+long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout);
+static inline void i915_retire_requests(struct drm_i915_private *i915)
+{
+	i915_retire_requests_timeout(i915, 0);
+}
 
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index 2a5fbe46ea9f..ed496bd6d84f 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -18,8 +18,7 @@ int igt_flush_test(struct drm_i915_private *i915)
 
 	cond_resched();
 
-	i915_retire_requests(i915);
-	if (i915_gem_wait_for_idle(i915, 0, HZ / 5) == -ETIME) {
+	if (i915_gem_wait_for_idle(i915, HZ / 5) == -ETIME) {
 		pr_err("%pS timed out, cancelling all further testing.\n",
 		       __builtin_return_address(0));
 
@@ -30,7 +29,6 @@ int igt_flush_test(struct drm_i915_private *i915)
 		intel_gt_set_wedged(&i915->gt);
 		ret = -EIO;
 	}
-	i915_retire_requests(i915);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index 04a6f88fdf64..eae90f97df6c 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -23,9 +23,7 @@ int igt_live_test_begin(struct igt_live_test *t,
 	t->func = func;
 	t->name = name;
 
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE,
-				     MAX_SCHEDULE_TIMEOUT);
+	err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
 	if (err) {
 		pr_err("%s(%s): failed to idle before, with err=%d!",
 		       func, name, err);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index a8be5da2b3cf..3b589bbb2c2d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -44,7 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915)
 	do {
 		for_each_engine(engine, i915, id)
 			mock_engine_flush(engine);
-	} while (i915_retire_requests(i915));
+	} while (i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT));
 }
 
 static void mock_device_release(struct drm_device *dev)
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 13/30] drm/i915/gem: Retire directly for mmap-offset shrinking
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (10 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 12/30] drm/i915: Merge wait_for_timelines with retire_request Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 15:53   ` Tvrtko Ursulin
  2019-10-02 11:19 ` [PATCH 14/30] drm/i915: Move request runtime management onto gt Chris Wilson
                   ` (22 subsequent siblings)
  34 siblings, 1 reply; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Now that we can retire without taking struct_mutex, we can do so to
handle shrinking the mmap-offset space after an allocation failure.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 418d0d2b5fa9..45bbd22c14f1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -431,19 +431,12 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj)
 		return 0;
 
 	/* Attempt to reap some mmap space from dead objects */
-	do {
-		err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
-		if (err)
-			break;
+	err = i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		return err;
 
-		i915_gem_drain_freed_objects(i915);
-		err = drm_gem_create_mmap_offset(&obj->base);
-		if (!err)
-			break;
-
-	} while (flush_delayed_work(&i915->gem.retire_work));
-
-	return err;
+	i915_gem_drain_freed_objects(i915);
+	return drm_gem_create_mmap_offset(&obj->base);
 }
 
 int
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 14/30] drm/i915: Move request runtime management onto gt
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (11 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 13/30] drm/i915/gem: Retire directly for mmap-offset shrinking Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 15:58   ` Tvrtko Ursulin
  2019-10-02 11:19 ` [PATCH 15/30] drm/i915: Move global activity tracking from GEM to GT Chris Wilson
                   ` (21 subsequent siblings)
  34 siblings, 1 reply; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Requests are run from the gt and are tided into the gt runtime power
management, so pull the runtime request management under gt/

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  28 +---
 .../drm/i915/gem/selftests/i915_gem_context.c |   5 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c    |   2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c            |   2 +
 drivers/gpu/drm/i915/gt/intel_gt_pm.c         |   5 +-
 drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 123 ++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_requests.h   |  24 ++++
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |  11 ++
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |   8 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |  17 +--
 drivers/gpu/drm/i915/i915_drv.h               |  10 --
 drivers/gpu/drm/i915/i915_gem.c               |  17 ---
 drivers/gpu/drm/i915/i915_gem_evict.c         |  14 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   5 +-
 drivers/gpu/drm/i915/i915_request.c           |  64 +--------
 drivers/gpu/drm/i915/i915_request.h           |   7 +-
 .../gpu/drm/i915/selftests/igt_flush_test.c   |   9 +-
 .../gpu/drm/i915/selftests/igt_live_test.c    |   5 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  10 +-
 21 files changed, 213 insertions(+), 158 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_requests.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_requests.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d2b53b5add81..06e1876d0250 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -83,6 +83,7 @@ gt-y += \
 	gt/intel_gt_irq.o \
 	gt/intel_gt_pm.o \
 	gt/intel_gt_pm_irq.o \
+	gt/intel_gt_requests.o \
 	gt/intel_hangcheck.o \
 	gt/intel_lrc.o \
 	gt/intel_rc6.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 45bbd22c14f1..fd4122d8c0a9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -8,6 +8,7 @@
 #include <linux/sizes.h>
 
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_gem_gtt.h"
@@ -424,6 +425,7 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
 static int create_mmap_offset(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_gt *gt = &i915->gt;
 	int err;
 
 	err = drm_gem_create_mmap_offset(&obj->base);
@@ -431,7 +433,7 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj)
 		return 0;
 
 	/* Attempt to reap some mmap space from dead objects */
-	err = i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT);
+	err = intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 90b211257f2d..9194d8464bf7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -7,31 +7,18 @@
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_globals.h"
 
 static void i915_gem_park(struct drm_i915_private *i915)
 {
-	cancel_delayed_work(&i915->gem.retire_work);
-
 	i915_vma_parked(i915);
 
 	i915_globals_park();
 }
 
-static void retire_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), gem.retire_work.work);
-
-	i915_retire_requests(i915);
-
-	queue_delayed_work(i915->wq,
-			   &i915->gem.retire_work,
-			   round_jiffies_up_relative(HZ));
-}
-
 static int pm_notifier(struct notifier_block *nb,
 		       unsigned long action,
 		       void *data)
@@ -42,9 +29,6 @@ static int pm_notifier(struct notifier_block *nb,
 	switch (action) {
 	case INTEL_GT_UNPARK:
 		i915_globals_unpark();
-		queue_delayed_work(i915->wq,
-				   &i915->gem.retire_work,
-				   round_jiffies_up_relative(HZ));
 		break;
 
 	case INTEL_GT_PARK:
@@ -59,7 +43,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt)
 {
 	bool result = !intel_gt_is_wedged(gt);
 
-	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 		/* XXX hide warning from gem_eio */
 		if (i915_modparams.reset) {
 			dev_err(gt->i915->drm.dev,
@@ -122,14 +106,12 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	switch_to_kernel_context_sync(&i915->gt);
+	intel_gt_suspend(&i915->gt);
+	intel_uc_suspend(&i915->gt.uc);
 
 	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
 
 	i915_gem_drain_freed_objects(i915);
-
-	intel_uc_suspend(&i915->gt.uc);
-	intel_gt_suspend(&i915->gt);
 }
 
 static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
@@ -239,8 +221,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
 
 void i915_gem_init__pm(struct drm_i915_private *i915)
 {
-	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
-
 	i915->gem.pm_notifier.notifier_call = pm_notifier;
 	blocking_notifier_chain_register(&i915->gt.pm_notifications,
 					 &i915->gem.pm_notifier);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index f902aeee1755..2288757808ae 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -8,6 +8,7 @@
 
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 #include "gt/intel_reset.h"
 #include "i915_selftest.h"
 
@@ -518,7 +519,7 @@ create_test_object(struct i915_address_space *vm,
 	int err;
 
 	/* Keep in GEM's good graces */
-	i915_retire_requests(vm->i915);
+	intel_gt_retire_requests(vm->gt);
 
 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
@@ -1136,7 +1137,7 @@ __sseu_finish(const char *name,
 		igt_spinner_end(spin);
 
 	if ((flags & TEST_IDLE) && ret == 0) {
-		ret = i915_gem_wait_for_idle(ce->engine->i915,
+		ret = intel_gt_wait_for_idle(ce->engine->gt,
 					     MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 4ba6ed5c8313..1cd25cfd0246 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -573,7 +573,7 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	i915_gem_driver_unregister__shrinker(i915);
 	intel_gt_pm_get(&i915->gt);
-	cancel_delayed_work_sync(&i915->gem.retire_work);
+	cancel_delayed_work_sync(&i915->gt.requests.retire_work);
 }
 
 static void restore_retire_worker(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 7205595369be..53220741e49e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -6,6 +6,7 @@
 #include "i915_drv.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_mocs.h"
 #include "intel_rc6.h"
 #include "intel_uncore.h"
@@ -23,6 +24,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 
 	intel_gt_init_hangcheck(gt);
 	intel_gt_init_reset(gt);
+	intel_gt_init_requests(gt);
 	intel_gt_pm_init_early(gt);
 	intel_uc_init_early(&gt->uc);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index bdb34f03ec47..d2e80ba64d69 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -10,6 +10,7 @@
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_pm.h"
 #include "intel_rc6.h"
 #include "intel_wakeref.h"
@@ -49,6 +50,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
 	i915_pmu_gt_unparked(i915);
 
 	intel_gt_queue_hangcheck(gt);
+	intel_gt_unpark_requests(gt);
 
 	pm_notify(gt, INTEL_GT_UNPARK);
 
@@ -64,6 +66,7 @@ static int __gt_park(struct intel_wakeref *wf)
 	GEM_TRACE("\n");
 
 	pm_notify(gt, INTEL_GT_PARK);
+	intel_gt_park_requests(gt);
 
 	i915_pmu_gt_parked(i915);
 	if (INTEL_GEN(i915) >= 6)
@@ -196,7 +199,7 @@ int intel_gt_resume(struct intel_gt *gt)
 
 static void wait_for_idle(struct intel_gt *gt)
 {
-	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
 		/*
 		 * Forcibly cancel outstanding work and leave
 		 * the gpu quiet.
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
new file mode 100644
index 000000000000..8aed89fd2cdc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -0,0 +1,123 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_request.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_timeline.h"
+
+static void retire_requests(struct intel_timeline *tl)
+{
+	struct i915_request *rq, *rn;
+
+	list_for_each_entry_safe(rq, rn, &tl->requests, link)
+		if (!i915_request_retire(rq))
+			break;
+}
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
+{
+	struct intel_gt_timelines *timelines = &gt->timelines;
+	struct intel_timeline *tl, *tn;
+	unsigned long active_count = 0;
+	unsigned long flags;
+	bool interruptible;
+	LIST_HEAD(free);
+
+	interruptible = true;
+	if (unlikely(timeout < 0))
+		timeout = -timeout, interruptible = false;
+
+	spin_lock_irqsave(&timelines->lock, flags);
+	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+		if (!mutex_trylock(&tl->mutex))
+			continue;
+
+		intel_timeline_get(tl);
+		GEM_BUG_ON(!tl->active_count);
+		tl->active_count++; /* pin the list element */
+		spin_unlock_irqrestore(&timelines->lock, flags);
+
+		if (timeout > 0) {
+			struct dma_fence *fence;
+
+			fence = i915_active_fence_get(&tl->last_request);
+			if (fence) {
+				timeout = dma_fence_wait_timeout(fence,
+								 true,
+								 timeout);
+				dma_fence_put(fence);
+			}
+		}
+
+		retire_requests(tl);
+
+		spin_lock_irqsave(&timelines->lock, flags);
+
+		/* Resume iteration after dropping lock */
+		list_safe_reset_next(tl, tn, link);
+		if (--tl->active_count)
+			active_count += !!rcu_access_pointer(tl->last_request.fence);
+		else
+			list_del(&tl->link);
+
+		mutex_unlock(&tl->mutex);
+
+		/* Defer the final release to after the spinlock */
+		if (refcount_dec_and_test(&tl->kref.refcount)) {
+			GEM_BUG_ON(tl->active_count);
+			list_add(&tl->link, &free);
+		}
+	}
+	spin_unlock_irqrestore(&timelines->lock, flags);
+
+	list_for_each_entry_safe(tl, tn, &free, link)
+		__intel_timeline_free(&tl->kref);
+
+	return active_count ? timeout : 0;
+}
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
+{
+	/* If the device is asleep, we have no requests outstanding */
+	if (!intel_gt_pm_is_awake(gt))
+		return 0;
+
+	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
+		cond_resched();
+		if (signal_pending(current))
+			return -EINTR;
+	}
+
+	return timeout;
+}
+
+static void retire_work_handler(struct work_struct *work)
+{
+	struct intel_gt *gt =
+		container_of(work, typeof(*gt), requests.retire_work.work);
+
+	intel_gt_retire_requests(gt);
+	schedule_delayed_work(&gt->requests.retire_work,
+			      round_jiffies_up_relative(HZ));
+}
+
+void intel_gt_init_requests(struct intel_gt *gt)
+{
+	INIT_DELAYED_WORK(&gt->requests.retire_work, retire_work_handler);
+}
+
+void intel_gt_park_requests(struct intel_gt *gt)
+{
+	cancel_delayed_work(&gt->requests.retire_work);
+}
+
+void intel_gt_unpark_requests(struct intel_gt *gt)
+{
+	schedule_delayed_work(&gt->requests.retire_work,
+			      round_jiffies_up_relative(HZ));
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
new file mode 100644
index 000000000000..bd31cbce47e0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_REQUESTS_H
+#define INTEL_GT_REQUESTS_H
+
+struct intel_gt;
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
+static inline void intel_gt_retire_requests(struct intel_gt *gt)
+{
+	intel_gt_retire_requests_timeout(gt, 0);
+}
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
+
+void intel_gt_init_requests(struct intel_gt *gt);
+void intel_gt_park_requests(struct intel_gt *gt);
+void intel_gt_unpark_requests(struct intel_gt *gt);
+
+#endif /* INTEL_GT_REQUESTS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 7134f1319bbe..802f516a3430 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -50,6 +50,17 @@ struct intel_gt {
 		struct list_head hwsp_free_list;
 	} timelines;
 
+	struct intel_gt_requests {
+		/**
+		 * We leave the user IRQ off as much as possible,
+		 * but this means that requests will finish and never
+		 * be retired once the system goes idle. Set a timer to
+		 * fire periodically while the ring is running. When it
+		 * fires, go retire requests.
+		 */
+		struct delayed_work retire_work;
+	} requests;
+
 	struct intel_wakeref wakeref;
 	atomic_t user_wakeref;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 16abfabf08c7..d6df40cdc8a6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -8,6 +8,7 @@
 
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
+#include "intel_gt_requests.h"
 
 #include "../selftests/i915_random.h"
 #include "../i915_selftest.h"
@@ -641,6 +642,7 @@ static int live_hwsp_alternate(void *arg)
 static int live_hwsp_wrap(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = &i915->gt;
 	struct intel_engine_cs *engine;
 	struct intel_timeline *tl;
 	enum intel_engine_id id;
@@ -651,7 +653,7 @@ static int live_hwsp_wrap(void *arg)
 	 * foreign GPU references.
 	 */
 
-	tl = intel_timeline_create(&i915->gt, NULL);
+	tl = intel_timeline_create(gt, NULL);
 	if (IS_ERR(tl))
 		return PTR_ERR(tl);
 
@@ -662,7 +664,7 @@ static int live_hwsp_wrap(void *arg)
 	if (err)
 		goto out_free;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt->i915, id) {
 		const u32 *hwsp_seqno[2];
 		struct i915_request *rq;
 		u32 seqno[2];
@@ -734,7 +736,7 @@ static int live_hwsp_wrap(void *arg)
 			goto out;
 		}
 
-		i915_retire_requests(i915); /* recycle HWSP */
+		intel_gt_retire_requests(gt); /* recycle HWSP */
 	}
 
 out:
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5888a658e2b7..2afc41e43b6e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -41,6 +41,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
 #include "gt/intel_reset.h"
 #include "gt/intel_rc6.h"
 #include "gt/uc/intel_guc_submission.h"
@@ -3621,33 +3622,33 @@ static int
 i915_drop_caches_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
+	struct intel_gt *gt = &i915->gt;
 	int ret;
 
 	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
 		  val, val & DROP_ALL);
 
 	if (val & DROP_RESET_ACTIVE &&
-	    wait_for(intel_engines_are_idle(&i915->gt),
-		     I915_IDLE_ENGINES_TIMEOUT))
-		intel_gt_set_wedged(&i915->gt);
+	    wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT))
+		intel_gt_set_wedged(gt);
 
 	if (val & DROP_RETIRE)
-		i915_retire_requests(i915);
+		intel_gt_retire_requests(gt);
 
 	if (val & (DROP_IDLE | DROP_ACTIVE)) {
-		ret = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
+		ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
 	}
 
 	if (val & DROP_IDLE) {
-		ret = intel_gt_pm_wait_for_idle(&i915->gt);
+		ret = intel_gt_pm_wait_for_idle(gt);
 		if (ret)
 			return ret;
 	}
 
-	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt))
-		intel_gt_handle_error(&i915->gt, ALL_ENGINES, 0, NULL);
+	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt))
+		intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL);
 
 	fs_reclaim_acquire(GFP_KERNEL);
 	if (val & DROP_BOUND)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 44f3463ff9f1..cb63b2bd0ce8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1710,15 +1710,6 @@ struct drm_i915_private {
 
 	struct {
 		struct notifier_block pm_notifier;
-
-		/**
-		 * We leave the user IRQ off as much as possible,
-		 * but this means that requests will finish and never
-		 * be retired once the system goes idle. Set a timer to
-		 * fire periodically while the ring is running. When it
-		 * fires, go retire requests.
-		 */
-		struct delayed_work retire_work;
 	} gem;
 
 	/* For i945gm vblank irq vs. C3 workaround */
@@ -2321,7 +2312,6 @@ void i915_gem_driver_register(struct drm_i915_private *i915);
 void i915_gem_driver_unregister(struct drm_i915_private *i915);
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv);
 void i915_gem_driver_release(struct drm_i915_private *dev_priv);
-int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, long timeout);
 void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7c82fc39f655..5a664bdead8c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -883,23 +883,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	}
 }
 
-int i915_gem_wait_for_idle(struct drm_i915_private *i915, long timeout)
-{
-	struct intel_gt *gt = &i915->gt;
-
-	/* If the device is asleep, we have no requests outstanding */
-	if (!intel_gt_pm_is_awake(gt))
-		return 0;
-
-	while ((timeout = i915_retire_requests_timeout(i915, timeout)) > 0) {
-		cond_resched();
-		if (signal_pending(current))
-			return -EINTR;
-	}
-
-	return timeout;
-}
-
 struct i915_vma *
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 0a412f6d01d7..7e62c310290f 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -29,6 +29,7 @@
 #include <drm/i915_drm.h>
 
 #include "gem/i915_gem_context.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_trace.h"
@@ -37,7 +38,7 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
 	bool fail_if_busy:1;
 } igt_evict_ctl;)
 
-static int ggtt_flush(struct drm_i915_private *i915)
+static int ggtt_flush(struct intel_gt *gt)
 {
 	/*
 	 * Not everything in the GGTT is tracked via vma (otherwise we
@@ -46,7 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	return i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
+	return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
 }
 
 static bool
@@ -92,7 +93,6 @@ i915_gem_evict_something(struct i915_address_space *vm,
 			 u64 start, u64 end,
 			 unsigned flags)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct drm_mm_scan scan;
 	struct list_head eviction_list;
 	struct i915_vma *vma, *next;
@@ -124,7 +124,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 				    min_size, alignment, color,
 				    start, end, mode);
 
-	i915_retire_requests(vm->i915);
+	intel_gt_retire_requests(vm->gt);
 
 search_again:
 	active = NULL;
@@ -197,7 +197,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
 		return -EBUSY;
 
-	ret = ggtt_flush(dev_priv);
+	ret = ggtt_flush(vm->gt);
 	if (ret)
 		return ret;
 
@@ -270,7 +270,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 	 * a stray pin (preventing eviction) that can only be resolved by
 	 * retiring.
 	 */
-	i915_retire_requests(vm->i915);
+	intel_gt_retire_requests(vm->gt);
 
 	if (i915_vm_has_cache_coloring(vm)) {
 		/* Expand search to cover neighbouring guard pages (or lack!) */
@@ -372,7 +372,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	 * switch otherwise is ineffective.
 	 */
 	if (i915_is_ggtt(vm)) {
-		ret = ggtt_flush(vm->i915);
+		ret = ggtt_flush(vm->gt);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 082fcf9085a6..1d26634ca597 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -38,6 +38,7 @@
 
 #include "display/intel_frontbuffer.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_scatterlist.h"
@@ -2529,8 +2530,8 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 
 	if (unlikely(ggtt->do_idle_maps)) {
 		/* XXX This does not prevent more requests being submitted! */
-		if (i915_retire_requests_timeout(dev_priv,
-						 -MAX_SCHEDULE_TIMEOUT)) {
+		if (intel_gt_retire_requests_timeout(ggtt->vm.gt,
+						     -MAX_SCHEDULE_TIMEOUT)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 52f7c4e5b644..437f9fc6282e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -216,7 +216,7 @@ static void remove_from_engine(struct i915_request *rq)
 	spin_unlock(&locked->active.lock);
 }
 
-static bool i915_request_retire(struct i915_request *rq)
+bool i915_request_retire(struct i915_request *rq)
 {
 	if (!i915_request_completed(rq))
 		return false;
@@ -1508,68 +1508,6 @@ long i915_request_wait(struct i915_request *rq,
 	return timeout;
 }
 
-long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout)
-{
-	struct intel_gt_timelines *timelines = &i915->gt.timelines;
-	struct intel_timeline *tl, *tn;
-	unsigned long active_count = 0;
-	unsigned long flags;
-	bool interruptible;
-	LIST_HEAD(free);
-
-	interruptible = true;
-	if (timeout < 0)
-		timeout = -timeout, interruptible = false;
-
-	spin_lock_irqsave(&timelines->lock, flags);
-	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
-		if (!mutex_trylock(&tl->mutex))
-			continue;
-
-		intel_timeline_get(tl);
-		GEM_BUG_ON(!tl->active_count);
-		tl->active_count++; /* pin the list element */
-		spin_unlock_irqrestore(&timelines->lock, flags);
-
-		if (timeout > 0) {
-			struct dma_fence *fence;
-
-			fence = i915_active_fence_get(&tl->last_request);
-			if (fence) {
-				timeout = dma_fence_wait_timeout(fence,
-								 interruptible,
-								 timeout);
-				dma_fence_put(fence);
-			}
-		}
-
-		retire_requests(tl);
-
-		spin_lock_irqsave(&timelines->lock, flags);
-
-		/* Resume iteration after dropping lock */
-		list_safe_reset_next(tl, tn, link);
-		if (--tl->active_count)
-			active_count += !!rcu_access_pointer(tl->last_request.fence);
-		else
-			list_del(&tl->link);
-
-		mutex_unlock(&tl->mutex);
-
-		/* Defer the final release to after the spinlock */
-		if (refcount_dec_and_test(&tl->kref.refcount)) {
-			GEM_BUG_ON(tl->active_count);
-			list_add(&tl->link, &free);
-		}
-	}
-	spin_unlock_irqrestore(&timelines->lock, flags);
-
-	list_for_each_entry_safe(tl, tn, &free, link)
-		__intel_timeline_free(&tl->kref);
-
-	return active_count ? timeout : 0;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_request.c"
 #include "selftests/i915_request.c"
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 256b0715180f..6a95242b280d 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -250,6 +250,7 @@ struct i915_request *__i915_request_commit(struct i915_request *request);
 void __i915_request_queue(struct i915_request *rq,
 			  const struct i915_sched_attr *attr);
 
+bool i915_request_retire(struct i915_request *rq);
 void i915_request_retire_upto(struct i915_request *rq);
 
 static inline struct i915_request *
@@ -459,10 +460,4 @@ i915_request_active_timeline(struct i915_request *rq)
 					 lockdep_is_held(&rq->engine->active.lock));
 }
 
-long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout);
-static inline void i915_retire_requests(struct drm_i915_private *i915)
-{
-	i915_retire_requests_timeout(i915, 0);
-}
-
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index ed496bd6d84f..7b0939e3f007 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -4,8 +4,8 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "gem/i915_gem_context.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
 #include "i915_selftest.h"
@@ -14,11 +14,12 @@
 
 int igt_flush_test(struct drm_i915_private *i915)
 {
-	int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0;
+	struct intel_gt *gt = &i915->gt;
+	int ret = intel_gt_is_wedged(gt) ? -EIO : 0;
 
 	cond_resched();
 
-	if (i915_gem_wait_for_idle(i915, HZ / 5) == -ETIME) {
+	if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
 		pr_err("%pS timed out, cancelling all further testing.\n",
 		       __builtin_return_address(0));
 
@@ -26,7 +27,7 @@ int igt_flush_test(struct drm_i915_private *i915)
 			  __builtin_return_address(0));
 		GEM_TRACE_DUMP();
 
-		intel_gt_set_wedged(&i915->gt);
+		intel_gt_set_wedged(gt);
 		ret = -EIO;
 	}
 
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
index eae90f97df6c..810b60100c2c 100644
--- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -4,7 +4,8 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "../i915_drv.h"
+#include "i915_drv.h"
+#include "gt/intel_gt_requests.h"
 
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
@@ -23,7 +24,7 @@ int igt_live_test_begin(struct igt_live_test *t,
 	t->func = func;
 	t->name = name;
 
-	err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
+	err = intel_gt_wait_for_idle(&i915->gt, MAX_SCHEDULE_TIMEOUT);
 	if (err) {
 		pr_err("%s(%s): failed to idle before, with err=%d!",
 		       func, name, err);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 3b589bbb2c2d..4e6cde0d4859 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -26,6 +26,7 @@
 #include <linux/pm_runtime.h>
 
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
 #include "gt/mock_engine.h"
 
 #include "mock_request.h"
@@ -44,7 +45,8 @@ void mock_device_flush(struct drm_i915_private *i915)
 	do {
 		for_each_engine(engine, i915, id)
 			mock_engine_flush(engine);
-	} while (i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT));
+	} while (intel_gt_retire_requests_timeout(&i915->gt,
+						  MAX_SCHEDULE_TIMEOUT));
 }
 
 static void mock_device_release(struct drm_device *dev)
@@ -98,10 +100,6 @@ static void release_dev(struct device *dev)
 	kfree(pdev);
 }
 
-static void mock_retire_work_handler(struct work_struct *work)
-{
-}
-
 static int pm_domain_resume(struct device *dev)
 {
 	return pm_generic_runtime_resume(dev);
@@ -181,8 +179,6 @@ struct drm_i915_private *mock_gem_device(void)
 
 	mock_init_contexts(i915);
 
-	INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler);
-
 	intel_timelines_init(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 15/30] drm/i915: Move global activity tracking from GEM to GT
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (12 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 14/30] drm/i915: Move request runtime management onto gt Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 16/30] drm/i915: Expose engine properties via sysfs Chris Wilson
                   ` (20 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

As our global unpark/park keep track of the number of active users, we
can simply move the accounting from the GEM layer to the base GT layer.
It was placed originally inside GEM to benefit from the 100ms extra
delay on idleness, but that has been eliminated and now there is no
substantive difference between the layers. In moving it, we move another
piece of the puzzle out from underneath struct_mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c | 11 +----------
 drivers/gpu/drm/i915/gt/intel_gt_pm.c  |  5 +++++
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 9194d8464bf7..7c316d4633db 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -10,14 +10,6 @@
 #include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
-#include "i915_globals.h"
-
-static void i915_gem_park(struct drm_i915_private *i915)
-{
-	i915_vma_parked(i915);
-
-	i915_globals_park();
-}
 
 static int pm_notifier(struct notifier_block *nb,
 		       unsigned long action,
@@ -28,11 +20,10 @@ static int pm_notifier(struct notifier_block *nb,
 
 	switch (action) {
 	case INTEL_GT_UNPARK:
-		i915_globals_unpark();
 		break;
 
 	case INTEL_GT_PARK:
-		i915_gem_park(i915);
+		i915_vma_parked(i915);
 		break;
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index d2e80ba64d69..b52e2ba3d092 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -5,6 +5,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_globals.h"
 #include "i915_params.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
@@ -27,6 +28,8 @@ static int __gt_unpark(struct intel_wakeref *wf)
 
 	GEM_TRACE("\n");
 
+	i915_globals_unpark();
+
 	/*
 	 * It seems that the DMC likes to transition between the DC states a lot
 	 * when there are no connected displays (no active power domains) during
@@ -78,6 +81,8 @@ static int __gt_park(struct intel_wakeref *wf)
 	GEM_BUG_ON(!wakeref);
 	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
 
+	i915_globals_park();
+
 	return 0;
 }
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 16/30] drm/i915: Expose engine properties via sysfs
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (13 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 15/30] drm/i915: Move global activity tracking from GEM to GT Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 17/30] drm/i915/execlists: Force preemption Chris Wilson
                   ` (19 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Preliminary stub to add engines underneath /sys/class/drm/cardN/, so
that we can expose properties on each engine to the sysadmin.

To start with we have basic analogues of the i915_query ioctl so that we
can pretty print engine discovery from the shell, and flesh out the
directory structure. Later we will add writeable sysadmin properties such
as per-engine timeout controls.

An example tree of the engine properties on Braswell:
    /sys/class/drm/card0
    └── engine
        ├── bcs0
        │   ├── class
        │   ├── heartbeat_interval_ms
        │   ├── instance
        │   ├── mmio_base
        │   └── name
        ├── rcs0
        │   ├── class
        │   ├── heartbeat_interval_ms
        │   ├── instance
        │   ├── mmio_base
        │   └── name
        ├── vcs0
        │   ├── class
        │   ├── heartbeat_interval_ms
        │   ├── instance
        │   ├── mmio_base
        │   └── name
        └── vecs0
            ├── class
            ├── heartbeat_interval_ms
            ├── instance
            ├── mmio_base
            └── name

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/Makefile                |   3 +-
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 119 +++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.h |  14 +++
 drivers/gpu/drm/i915/i915_sysfs.c            |   3 +
 4 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_sysfs.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 06e1876d0250..1f87c70a2842 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -76,8 +76,9 @@ gt-y += \
 	gt/intel_breadcrumbs.o \
 	gt/intel_context.o \
 	gt/intel_engine_cs.o \
-	gt/intel_engine_pool.o \
 	gt/intel_engine_pm.o \
+	gt/intel_engine_pool.o \
+	gt/intel_engine_sysfs.o \
 	gt/intel_engine_user.o \
 	gt/intel_gt.o \
 	gt/intel_gt_irq.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
new file mode 100644
index 000000000000..cbe9ec59beeb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -0,0 +1,119 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include "i915_drv.h"
+#include "intel_engine.h"
+#include "intel_engine_sysfs.h"
+
+struct kobj_engine {
+	struct kobject base;
+	struct intel_engine_cs *engine;
+};
+
+static struct intel_engine_cs *kobj_to_engine(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_engine, base)->engine;
+}
+
+static ssize_t
+name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", kobj_to_engine(kobj)->name);
+}
+
+static ssize_t
+class_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_class);
+}
+
+static ssize_t
+inst_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", kobj_to_engine(kobj)->uabi_instance);
+}
+
+static ssize_t
+mmio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base);
+}
+
+static struct kobj_attribute name_attr = __ATTR(name, 0444, name_show, NULL);
+static struct kobj_attribute class_attr = __ATTR(class, 0444, class_show, NULL);
+static struct kobj_attribute inst_attr = __ATTR(instance, 0444, inst_show, NULL);
+static struct kobj_attribute mmio_attr = __ATTR(mmio_base, 0444, mmio_show, NULL);
+
+static void kobj_engine_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static struct kobj_type kobj_engine_type = {
+	.release = kobj_engine_release,
+	.sysfs_ops = &kobj_sysfs_ops
+};
+
+static struct kobject *
+kobj_engine(struct kobject *dir, struct intel_engine_cs *engine)
+{
+	struct kobj_engine *ke;
+
+	ke = kzalloc(sizeof(*ke), GFP_KERNEL);
+	if (!ke)
+		return NULL;
+
+	kobject_init(&ke->base, &kobj_engine_type);
+	ke->engine = engine;
+
+	if (kobject_add(&ke->base, dir, "%s", engine->name)) {
+		kobject_put(&ke->base);
+		return NULL;
+	}
+
+	/* xfer ownership to sysfs tree */
+	return &ke->base;
+}
+
+void intel_engines_add_sysfs(struct drm_i915_private *i915)
+{
+	static const struct attribute *files[] = {
+		&name_attr.attr,
+		&class_attr.attr,
+		&inst_attr.attr,
+		&mmio_attr.attr,
+		NULL
+	};
+
+	struct device *kdev = i915->drm.primary->kdev;
+	struct intel_engine_cs *engine;
+	struct kobject *dir;
+
+	dir = kobject_create_and_add("engine", &kdev->kobj);
+	if (!dir)
+		return;
+
+	for_each_uabi_engine(engine, i915) {
+		struct kobject *kobj;
+
+		kobj = kobj_engine(dir, engine);
+		if (!kobj)
+			goto err_engine;
+
+		if (sysfs_create_files(kobj, files))
+			goto err_engine;
+
+		if (0) {
+err_engine:
+			dev_err(kdev, "Failed to add sysfs engine '%s'\n",
+				engine->name);
+			break;
+		}
+	}
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.h b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.h
new file mode 100644
index 000000000000..ef44a745b70a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_SYSFS_H
+#define INTEL_ENGINE_SYSFS_H
+
+struct drm_i915_private;
+
+void intel_engines_add_sysfs(struct drm_i915_private *i915);
+
+#endif /* INTEL_ENGINE_SYSFS_H */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 034b8abc5062..da0cfcc65802 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -30,6 +30,7 @@
 #include <linux/stat.h>
 #include <linux/sysfs.h>
 
+#include "gt/intel_engine_sysfs.h"
 #include "gt/intel_rc6.h"
 
 #include "i915_drv.h"
@@ -620,6 +621,8 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 		DRM_ERROR("RPS sysfs setup failed\n");
 
 	i915_setup_error_capture(kdev);
+
+	intel_engines_add_sysfs(dev_priv);
 }
 
 void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 17/30] drm/i915/execlists: Force preemption
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (14 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 16/30] drm/i915: Expose engine properties via sysfs Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 18/30] drm/i915: Mark up "sentinel" requests Chris Wilson
                   ` (18 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

If the preempted context takes too long to relinquish control, e.g. it
is stuck inside a shader with arbitration disabled, evict that context
with an engine reset. This ensures that preemptions are reasonably
responsive, providing a tighter QoS for the more important context at
the cost of flagging unresponsive contexts more frequently (i.e. instead
of using an ~10s hangcheck, we now evict at ~100ms).  The challenge of
lies in picking a timeout that can be reasonably serviced by HW for
typical workloads, balancing the existing clients against the needs for
responsiveness.

Note that coupled with timeslicing, this will lead to rapid GPU "hang"
detection with multiple active contexts vying for GPU time.

The preempt timeout can be adjusted per-engine using,

	/sys/class/drm/card0/engine/*/preempt_timeout_ms

v2: Couple in sysfs control of preemption timeout

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile         | 15 ++++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  2 +
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 32 +++++++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 90 ++++++++++++++++++--
 drivers/gpu/drm/i915/i915_params.h           |  2 +-
 6 files changed, 141 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 48df8889a88a..3db4ca5492a9 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -25,3 +25,18 @@ config DRM_I915_SPIN_REQUEST
 	  May be 0 to disable the initial spin. In practice, we estimate
 	  the cost of enabling the interrupt (if currently disabled) to be
 	  a few microseconds.
+
+config DRM_I915_PREEMPT_TIMEOUT
+	int "Preempt timeout (ms)"
+	default 100 # milliseconds
+	help
+	  How long to wait (in milliseconds) for a preemption event to occur
+	  when submitting a new context via execlists. If the current context
+	  does not hit an arbitration point and yield to HW before the timer
+	  expires, the HW will be reset to allow the more important context
+	  to execute.
+
+	  This is adjustable via
+	  /sys/class/drm/card0/engine/*/preempt_timeout_ms
+
+	  May be 0 to disable the timeout.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 80fd072ac719..dfdffaf35678 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -304,6 +304,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->instance = info->instance;
 	__sprint_engine_name(engine);
 
+	engine->props.preempt_timeout = CONFIG_DRM_I915_PREEMPT_TIMEOUT;
+
 	/*
 	 * To be overridden by the backend on setup. However to facilitate
 	 * cleanup on error during setup, we always provide the destroy vfunc.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
index cbe9ec59beeb..aac26097c916 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -45,10 +45,37 @@ mmio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 	return sprintf(buf, "0x%x\n", kobj_to_engine(kobj)->mmio_base);
 }
 
+static ssize_t
+preempt_timeout_show(struct kobject *kobj, struct kobj_attribute *attr,
+		     char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->props.preempt_timeout);
+}
+
+static ssize_t
+preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
+		      const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	unsigned long timeout;
+	int err;
+
+	err = kstrtoul(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	engine->props.preempt_timeout = timeout;
+	return count;
+}
+
 static struct kobj_attribute name_attr = __ATTR(name, 0444, name_show, NULL);
 static struct kobj_attribute class_attr = __ATTR(class, 0444, class_show, NULL);
 static struct kobj_attribute inst_attr = __ATTR(instance, 0444, inst_show, NULL);
 static struct kobj_attribute mmio_attr = __ATTR(mmio_base, 0444, mmio_show, NULL);
+static struct kobj_attribute preempt_timeout_attr =
+__ATTR(preempt_timeout_ms, 0600, preempt_timeout_show, preempt_timeout_store);
 
 static void kobj_engine_release(struct kobject *kobj)
 {
@@ -109,6 +136,11 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
 		if (sysfs_create_files(kobj, files))
 			goto err_engine;
 
+		if (CONFIG_DRM_I915_PREEMPT_TIMEOUT &&
+		    intel_engine_has_preemption(engine) &&
+		    sysfs_create_file(kobj, &preempt_timeout_attr.attr))
+			goto err_engine;
+
 		if (0) {
 err_engine:
 			dev_err(kdev, "Failed to add sysfs engine '%s'\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 943f0663837e..61d271668948 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -173,6 +173,11 @@ struct intel_engine_execlists {
 	 */
 	struct timer_list timer;
 
+	/**
+	 * @preempt: reset the current context if it fails to give way
+	 */
+	struct timer_list preempt;
+
 	/**
 	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
 	 */
@@ -539,6 +544,10 @@ struct intel_engine_cs {
 		 */
 		ktime_t total;
 	} stats;
+
+	struct {
+		unsigned long preempt_timeout;
+	} props;
 };
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 431d3b8c3371..7741d78139ab 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1394,6 +1394,29 @@ static void record_preemption(struct intel_engine_execlists *execlists)
 	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
 }
 
+static void set_preempt_timeout(struct intel_engine_cs *engine)
+{
+	unsigned long timeout;
+
+	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+		return;
+
+	timeout = READ_ONCE(engine->props.preempt_timeout);
+	if (!timeout)
+		return;
+
+	timeout = msecs_to_jiffies_timeout(timeout);
+	/*
+	 * Paranoia to make sure the compiler computes the timeout before
+	 * loading 'jiffies' as jiffies is volatile and may be updated in
+	 * the background by a timer tick. All to reduce the complexity
+	 * of the addition and reduce the risk of losing a jiffie.
+	 */
+	barrier();
+
+	mod_timer(&engine->execlists.preempt, jiffies + timeout);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1743,6 +1766,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		execlists->switch_priority_hint =
 			switch_prio(engine, *execlists->pending);
 		execlists_submit_ports(engine);
+		set_preempt_timeout(engine);
 	} else {
 		ring_set_paused(engine, 0);
 	}
@@ -1970,6 +1994,38 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 		execlists_dequeue(engine);
 }
 
+static noinline void preempt_reset(struct intel_engine_cs *engine)
+{
+	const unsigned int bit = I915_RESET_ENGINE + engine->id;
+	unsigned long *lock = &engine->gt->reset.flags;
+
+	if (i915_modparams.reset < 3)
+		return;
+
+	if (test_and_set_bit(bit, lock))
+		return;
+
+	/* Mark this tasklet as disabled to avoid waiting for it to complete */
+	tasklet_disable_nosync(&engine->execlists.tasklet);
+
+	intel_engine_reset(engine, "preemption time out");
+
+	tasklet_enable(&engine->execlists.tasklet);
+	clear_and_wake_up_bit(bit, lock);
+}
+
+static bool preempt_timeout(struct intel_engine_cs *const engine)
+{
+	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+		return false;
+
+	if (!intel_engine_has_preemption(engine))
+		return false;
+
+	return !timer_pending(&engine->execlists.preempt) &&
+		READ_ONCE(engine->execlists.pending[0]);
+}
+
 /*
  * Check the unread Context Status Buffers and manage the submission of new
  * contexts to the ELSP accordingly.
@@ -1977,23 +2033,39 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 static void execlists_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
-	unsigned long flags;
+	bool timeout = preempt_timeout(engine);
 
 	process_csb(engine);
-	if (!READ_ONCE(engine->execlists.pending[0])) {
+	if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
+		unsigned long flags;
+
 		spin_lock_irqsave(&engine->active.lock, flags);
 		__execlists_submission_tasklet(engine);
 		spin_unlock_irqrestore(&engine->active.lock, flags);
+
+		/* Recheck after serialising with direct-submission */
+		if (timeout && preempt_timeout(engine))
+			preempt_reset(engine);
 	}
 }
 
-static void execlists_submission_timer(struct timer_list *timer)
+static void __execlists_kick(struct intel_engine_execlists *execlists)
 {
-	struct intel_engine_cs *engine =
-		from_timer(engine, timer, execlists.timer);
-
 	/* Kick the tasklet for some interrupt coalescing and reset handling */
-	tasklet_hi_schedule(&engine->execlists.tasklet);
+	tasklet_hi_schedule(&execlists->tasklet);
+}
+
+#define execlists_kick(t, member) \
+	__execlists_kick(container_of(t, struct intel_engine_execlists, member))
+
+static void execlists_timeslice(struct timer_list *timer)
+{
+	execlists_kick(timer, timer);
+}
+
+static void execlists_preempt(struct timer_list *timer)
+{
+	execlists_kick(timer, preempt);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
@@ -3417,6 +3489,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 static void execlists_park(struct intel_engine_cs *engine)
 {
 	del_timer(&engine->execlists.timer);
+	del_timer(&engine->execlists.preempt);
 }
 
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
@@ -3534,7 +3607,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 {
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
-	timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
+	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
+	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
 
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index d29ade3b7de6..56058978bb27 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -61,7 +61,7 @@ struct drm_printer;
 	param(char *, dmc_firmware_path, NULL) \
 	param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO)) \
 	param(int, edp_vswing, 0) \
-	param(int, reset, 2) \
+	param(int, reset, 3) \
 	param(unsigned int, inject_load_failure, 0) \
 	param(int, fastboot, -1) \
 	param(int, enable_dpcd_backlight, 0) \
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 18/30] drm/i915: Mark up "sentinel" requests
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (15 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 17/30] drm/i915/execlists: Force preemption Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 19/30] drm/i915/execlists: Cancel banned contexts on schedule-out Chris Wilson
                   ` (17 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Sometimes we want to emit a terminator request, a request that flushes
the pipeline and allows no request to come after it. This can be used
for a "preempt-to-idle" to ensure that upon processing the
context-switch to that request, all other active contexts have been
flushed.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c |  6 ++++++
 drivers/gpu/drm/i915/i915_request.h | 10 ++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7741d78139ab..52e5e4de1fc6 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1233,6 +1233,9 @@ static bool can_merge_rq(const struct i915_request *prev,
 	if (i915_request_completed(next))
 		return true;
 
+	if (i915_request_has_sentinel(prev))
+		return false;
+
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
 		return false;
 
@@ -1704,6 +1707,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (last->hw_context == rq->hw_context)
 					goto done;
 
+				if (i915_request_has_sentinel(last))
+					goto done;
+
 				/*
 				 * If GVT overrides us we only ever submit
 				 * port[0], leaving port[1] empty. Note that we
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 6a95242b280d..96991d64759c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -216,8 +216,9 @@ struct i915_request {
 	unsigned long emitted_jiffies;
 
 	unsigned long flags;
-#define I915_REQUEST_WAITBOOST BIT(0)
-#define I915_REQUEST_NOPREEMPT BIT(1)
+#define I915_REQUEST_WAITBOOST	BIT(0)
+#define I915_REQUEST_NOPREEMPT	BIT(1)
+#define I915_REQUEST_SENTINEL	BIT(2)
 
 	/** timeline->request entry for this request */
 	struct list_head link;
@@ -440,6 +441,11 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq)
 	return unlikely(rq->flags & I915_REQUEST_NOPREEMPT);
 }
 
+static inline bool i915_request_has_sentinel(const struct i915_request *rq)
+{
+	return unlikely(rq->flags & I915_REQUEST_SENTINEL);
+}
+
 static inline struct intel_timeline *
 i915_request_timeline(struct i915_request *rq)
 {
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 19/30] drm/i915/execlists: Cancel banned contexts on schedule-out
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (16 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 18/30] drm/i915: Mark up "sentinel" requests Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close Chris Wilson
                   ` (16 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

On completion of a banned context, scrub the context image so that we do
not replay the active payload. The intent is that we skip banned
payloads on request submission so that the timeline advancement
continues on in the background. However, if we are returning to a
preempted request, i915_request_skip() is ineffective and instead we
need to patch up the context image so that it continues from the start
of the next request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 58 +++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 52e5e4de1fc6..6dcceaf02e00 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     const struct intel_engine_cs *engine,
 				     const struct intel_ring *ring,
 				     bool close);
+static void
+__execlists_update_reg_state(const struct intel_context *ce,
+			     const struct intel_engine_cs *engine);
 
 static void __context_pin_acquire(struct intel_context *ce)
 {
@@ -1022,6 +1025,58 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 		tasklet_schedule(&ve->base.execlists.tasklet);
 }
 
+static void
+mark_complete(struct i915_request *rq, struct intel_engine_cs *engine)
+{
+	const struct intel_timeline * const tl = rcu_dereference(rq->timeline);
+
+	*(u32 *)tl->hwsp_seqno = rq->fence.seqno;
+	GEM_BUG_ON(!i915_request_completed(rq));
+
+	list_for_each_entry_from_reverse(rq, &tl->requests, link) {
+		if (i915_request_signaled(rq))
+			break;
+
+		mark_eio(rq);
+	}
+
+	intel_engine_queue_breadcrumbs(engine);
+}
+
+static void cancel_active(struct i915_request *rq,
+			  struct intel_engine_cs *engine)
+{
+	struct intel_context * const ce = rq->hw_context;
+	u32 *regs = ce->lrc_reg_state;
+
+	if (i915_request_completed(rq))
+		return;
+
+	GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
+		  __func__, engine->name, rq->fence.context, rq->fence.seqno);
+	__context_pin_acquire(ce);
+
+	/* Scrub the context image to prevent replaying the previous batch */
+	memcpy(regs, /* skip restoring the vanilla PPHWSP */
+	       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+	       engine->context_size - PAGE_SIZE);
+	execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+
+	/* Ring will be advanced on retire; here we need to reset the context */
+	ce->ring->head = intel_ring_wrap(ce->ring, rq->wa_tail);
+	__execlists_update_reg_state(ce, engine);
+
+	/* We've switched away, so this should be a no-op, but intent matters */
+	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+
+	/* Let everyone know that the request may now be retired */
+	rcu_read_lock();
+	mark_complete(rq, engine);
+	rcu_read_unlock();
+
+	__context_pin_release(ce);
+}
+
 static inline void
 __execlists_schedule_out(struct i915_request *rq,
 			 struct intel_engine_cs * const engine)
@@ -1032,6 +1087,9 @@ __execlists_schedule_out(struct i915_request *rq,
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put(engine->gt);
 
+	if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+		cancel_active(rq, engine);
+
 	/*
 	 * If this is part of a virtual engine, its next request may
 	 * have been blocked waiting for access to the active context.
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (17 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 19/30] drm/i915/execlists: Cancel banned contexts on schedule-out Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 13:52   ` Bloomfield, Jon
  2019-10-02 14:23   ` [PATCH v2] " Chris Wilson
  2019-10-02 11:19 ` [PATCH 21/30] drm/i915: Replace hangcheck by heartbeats Chris Wilson
                   ` (15 subsequent siblings)
  34 siblings, 2 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Normally, we rely on our hangcheck to prevent persistent batches from
hogging the GPU. However, if the user disables hangcheck, this mechanism
breaks down. Despite our insistence that this is unsafe, the users are
equally insistent that they want to use endless batches and will disable
the hangcheck mechanism. We are looking at perhaps replacing hangcheck
with a softer mechanism, that sends a pulse down the engine to check if
it is well. We can use the same preemptive pulse to flush an active
persistent context off the GPU upon context close, preventing resources
being lost and unkillable requests remaining on the GPU after process
termination. To avoid changing the ABI and accidentally breaking
existing userspace, we make the persistence of a context explicit and
enable it by default (matching current ABI). Userspace can opt out of
persistent mode (forcing requests to be cancelled when the context is
closed by process termination or explicitly) by a context parameter. To
facilitate existing use-cases of disabling hangcheck, if the modparam is
disabled (i915.enable_hangcheck=0), we disable persistence mode by
default.  (Note, one of the outcomes for supporting endless mode will be
the removal of hangchecking, at which point opting into persistent mode
will be mandatory, or maybe the default perhaps controlled by cgroups.)

Testcase: igt/gem_ctx_persistence
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 122 ++++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 +++
 .../gpu/drm/i915/gem/i915_gem_context_types.h |   1 +
 .../gpu/drm/i915/gem/selftests/mock_context.c |   2 +
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  56 ++++++++
 .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |  14 ++
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   2 +-
 drivers/gpu/drm/i915/i915_priolist_types.h    |   1 +
 include/uapi/drm/i915_drm.h                   |  15 +++
 10 files changed, 228 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1f87c70a2842..561fa2bb3006 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -76,6 +76,7 @@ gt-y += \
 	gt/intel_breadcrumbs.o \
 	gt/intel_context.o \
 	gt/intel_engine_cs.o \
+	gt/intel_engine_heartbeat.o \
 	gt/intel_engine_pm.o \
 	gt/intel_engine_pool.o \
 	gt/intel_engine_sysfs.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 0ab416887fc2..e832238a72e5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -70,6 +70,7 @@
 #include <drm/i915_drm.h>
 
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_user.h"
 
 #include "i915_gem_context.h"
@@ -375,6 +376,78 @@ void i915_gem_context_release(struct kref *ref)
 		queue_work(i915->wq, &i915->contexts.free_work);
 }
 
+static inline struct i915_gem_engines *
+__context_engines_static(struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->engines, true);
+}
+
+static void kill_context(struct i915_gem_context *ctx)
+{
+	intel_engine_mask_t tmp, active, reset;
+	struct intel_gt *gt = &ctx->i915->gt;
+	struct i915_gem_engines_iter it;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+
+	/*
+	 * If we are already banned, it was due to a guilty request causing
+	 * a reset and the entire context being evicted from the GPU.
+	 */
+	if (i915_gem_context_is_banned(ctx))
+		return;
+
+	i915_gem_context_set_banned(ctx);
+
+	/*
+	 * Map the user's engine back to the actual engines; one virtual
+	 * engine will be mapped to multiple engines, and using ctx->engine[]
+	 * the same engine may be have multiple instances in the user's map.
+	 * However, we only care about pending requests, so only include
+	 * engines on which there are incomplete requests.
+	 */
+	active = 0;
+	for_each_gem_engine(ce, __context_engines_static(ctx), it) {
+		struct dma_fence *fence;
+
+		if (!ce->timeline)
+			continue;
+
+		fence = i915_active_fence_get(&ce->timeline->last_request);
+		if (!fence)
+			continue;
+
+		engine = to_request(fence)->engine;
+		if (HAS_EXECLISTS(gt->i915))
+			engine = intel_context_inflight(ce);
+		if (engine)
+			active |= engine->mask;
+
+		dma_fence_put(fence);
+	}
+
+	/*
+	 * Send a "high priority pulse" down the engine to cause the
+	 * current request to be momentarily preempted. (If it fails to
+	 * be preempted, it will be reset). As we have marked our context
+	 * as banned, any incomplete request, including any running, will
+	 * be skipped following the preemption.
+	 */
+	reset = 0;
+	for_each_engine_masked(engine, gt->i915, active, tmp)
+		if (intel_engine_pulse(engine))
+			reset |= engine->mask;
+
+	/*
+	 * If we are unable to send a preemptive pulse to bump
+	 * the context from the GPU, we have to resort to a full
+	 * reset. We hope the collateral damage is worth it.
+	 */
+	if (reset)
+		intel_gt_handle_error(gt, reset, 0,
+				      "context closure in %s", ctx->name);
+}
+
 static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
@@ -400,6 +473,10 @@ static void context_close(struct i915_gem_context *ctx)
 	lut_close(ctx);
 
 	mutex_unlock(&ctx->mutex);
+
+	if (!i915_gem_context_is_persistent(ctx))
+		kill_context(ctx);
+
 	i915_gem_context_put(ctx);
 }
 
@@ -440,6 +517,21 @@ __create_context(struct drm_i915_private *i915)
 	i915_gem_context_set_bannable(ctx);
 	i915_gem_context_set_recoverable(ctx);
 
+	/*
+	 * If the user has disabled hangchecking, we can not be sure that
+	 * the batches will ever complete after the context is closed,
+	 * keep the context and all resources pinned forever. So in this
+	 * case we opt to forcibly kill off all remaining requests on
+	 * context close.
+	 *
+	 * Note that the user may chance the value of the modparam between
+	 * context creation and close, we choose to ignore this for the
+	 * sake of determinism and expect the user to set the parameter
+	 * on module load and never touch it again.
+	 */
+	if (i915_modparams.enable_hangcheck) /* cgroup hook? */
+		i915_gem_context_set_persistence(ctx);
+
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
@@ -597,6 +689,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	}
 
 	i915_gem_context_clear_bannable(ctx);
+	i915_gem_context_set_persistence(ctx);
 	ctx->sched.priority = I915_USER_PRIORITY(prio);
 
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
@@ -1724,6 +1817,26 @@ get_engines(struct i915_gem_context *ctx,
 	return err;
 }
 
+static int
+set_persistence(struct i915_gem_context *ctx,
+		const struct drm_i915_gem_context_param *args)
+{
+	if (args->size)
+		return -EINVAL;
+
+	if (args->value) {
+		i915_gem_context_set_persistence(ctx);
+		return 0;
+	}
+
+	/* To cancel a context we use "preempt-to-idle" */
+	if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+		return -ENODEV;
+
+	i915_gem_context_clear_persistence(ctx);
+	return 0;
+}
+
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
 			struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
@@ -1801,6 +1914,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
 		ret = set_engines(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		ret = set_persistence(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -2250,6 +2367,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		ret = get_engines(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		args->size = 0;
+		args->value = i915_gem_context_is_persistent(ctx);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 176978608b6f..e0f5b6c6a331 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -74,6 +74,21 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c
 	clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
 }
 
+static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
 static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
 {
 	return test_bit(CONTEXT_BANNED, &ctx->flags);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 260d59cc3de8..daf1ea5075a6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -137,6 +137,7 @@ struct i915_gem_context {
 #define UCONTEXT_NO_ERROR_CAPTURE	1
 #define UCONTEXT_BANNABLE		2
 #define UCONTEXT_RECOVERABLE		3
+#define UCONTEXT_PERSISTENCE		4
 
 	/**
 	 * @flags: small set of booleans
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index be8974ccff24..0d1a275ec316 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -23,6 +23,8 @@ mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
+	i915_gem_context_set_persistence(ctx);
+
 	mutex_init(&ctx->engines_mutex);
 	e = default_engines(ctx);
 	if (IS_ERR(e))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
new file mode 100644
index 000000000000..2fc413f9d506
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -0,0 +1,56 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_request.h"
+
+#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_engine.h"
+#include "intel_gt.h"
+
+static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
+{
+	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
+	i915_request_add_active_barriers(rq);
+}
+
+int intel_engine_pulse(struct intel_engine_cs *engine)
+{
+	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
+	struct intel_context *ce = engine->kernel_context;
+	struct i915_request *rq;
+	int err = 0;
+
+	if (!intel_engine_has_preemption(engine))
+		return -ENODEV;
+
+	if (!intel_engine_pm_get_if_awake(engine))
+		return 0;
+
+	if (mutex_lock_interruptible(&ce->timeline->mutex))
+		goto out_rpm;
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+	intel_context_exit(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unlock;
+	}
+
+	rq->flags |= I915_REQUEST_SENTINEL;
+	idle_pulse(engine, rq);
+
+	__i915_request_commit(rq);
+	__i915_request_queue(rq, &attr);
+
+out_unlock:
+	mutex_unlock(&ce->timeline->mutex);
+out_rpm:
+	intel_engine_pm_put(engine);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
new file mode 100644
index 000000000000..b950451b5998
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_HEARTBEAT_H
+#define INTEL_ENGINE_HEARTBEAT_H
+
+struct intel_engine_cs;
+
+int intel_engine_pulse(struct intel_engine_cs *engine);
+
+#endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 8e5e513eddc9..86c0b2280b7b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -111,7 +111,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	i915_request_add_active_barriers(rq);
 
 	/* Install ourselves as a preemption barrier */
-	rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
+	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	__i915_request_commit(rq);
 
 	/* Release our exclusive hold on the engine */
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index 21037a2e2038..ae8bb3cb627e 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -39,6 +39,7 @@ enum {
  * active request.
  */
 #define I915_PRIORITY_UNPREEMPTABLE INT_MAX
+#define I915_PRIORITY_BARRIER INT_MAX
 
 #define __NO_PREEMPTION (I915_PRIORITY_WAIT)
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 30c542144016..eb9e704d717a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1565,6 +1565,21 @@ struct drm_i915_gem_context_param {
  *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
  */
 #define I915_CONTEXT_PARAM_ENGINES	0xa
+
+/*
+ * I915_CONTEXT_PARAM_PERSISTENCE:
+ *
+ * Allow the context and active rendering to survive the process until
+ * completion. Persistence allows fire-and-forget clients to queue up a
+ * bunch of work, hand the output over to a display server and the quit.
+ * If the context is not marked as persistent, upon closing (either via
+ * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure
+ * or process termination), the context and any outstanding requests will be
+ * cancelled (and exported fences for cancelled requests marked as -EIO).
+ *
+ * By default, new contexts allow persistence.
+ */
+#define I915_CONTEXT_PARAM_PERSISTENCE	0xb
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 21/30] drm/i915: Replace hangcheck by heartbeats
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (18 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 13:55   ` Bloomfield, Jon
  2019-10-02 11:19 ` [PATCH 22/30] drm/i915: Remove logical HW ID Chris Wilson
                   ` (14 subsequent siblings)
  34 siblings, 1 reply; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Replace sampling the engine state every so often with a periodic
heartbeat request to measure the health of an engine. This is coupled
with the forced-preemption to allow long running requests to survive so
long as they do not block other users.

The heartbeat interval can be adjusted per-engine using,

	/sys/class/drm/card0/engine/*/heartbeat_interval_ms

v2: Couple in sysfs controls

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile          |  14 +
 drivers/gpu/drm/i915/Makefile                 |   1 -
 drivers/gpu/drm/i915/display/intel_display.c  |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   1 -
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |   2 -
 drivers/gpu/drm/i915/gt/intel_engine.h        |  32 --
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  11 +-
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 115 ++++++
 .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |   5 +
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   5 +-
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c  |  29 ++
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  17 +-
 drivers/gpu/drm/i915/gt/intel_gt.c            |   1 -
 drivers/gpu/drm/i915/gt/intel_gt.h            |   4 -
 drivers/gpu/drm/i915/gt/intel_gt_pm.c         |   1 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |   9 -
 drivers/gpu/drm/i915/gt/intel_hangcheck.c     | 361 ------------------
 drivers/gpu/drm/i915/gt/intel_reset.c         |   3 +-
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |   4 -
 drivers/gpu/drm/i915/i915_debugfs.c           |  87 -----
 drivers/gpu/drm/i915/i915_drv.c               |   3 -
 drivers/gpu/drm/i915/i915_drv.h               |   1 -
 drivers/gpu/drm/i915/i915_gpu_error.c         |  33 +-
 drivers/gpu/drm/i915/i915_gpu_error.h         |   2 -
 drivers/gpu/drm/i915/i915_priolist_types.h    |   6 +
 25 files changed, 194 insertions(+), 555 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/gt/intel_hangcheck.c

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 3db4ca5492a9..87f2b063b1f3 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -40,3 +40,17 @@ config DRM_I915_PREEMPT_TIMEOUT
 	  /sys/class/drm/card0/engine/*/preempt_timeout_ms
 
 	  May be 0 to disable the timeout.
+
+config DRM_I915_HEARTBEAT_INTERVAL
+	int "Interval between heartbeat pulses (ms)"
+	default 2500 # milliseconds
+	help
+	  While active the driver uses a periodic request, a heartbeat, to
+	  check the wellness of the GPU and to regularly flush state changes
+	  (idle barriers).
+
+	  This is adjustable via
+	  /sys/class/drm/card0/engine/*/heartbeat_interval_ms
+
+	  May be 0 to disable heartbeats and therefore disable automatic GPU
+	  hang detection.
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 561fa2bb3006..eab4a75b5255 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -86,7 +86,6 @@ gt-y += \
 	gt/intel_gt_pm.o \
 	gt/intel_gt_pm_irq.o \
 	gt/intel_gt_requests.o \
-	gt/intel_hangcheck.o \
 	gt/intel_lrc.o \
 	gt/intel_rc6.o \
 	gt/intel_renderstate.o \
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index c64c7045de28..cbc525103c69 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -14342,7 +14342,7 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
 static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
 {
 	struct i915_sched_attr attr = {
-		.priority = I915_PRIORITY_DISPLAY,
+		.priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY),
 	};
 
 	i915_gem_object_wait_priority(obj, 0, &attr);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index a70fbbd8c98d..74d59e5404b4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -435,6 +435,5 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
 				  const struct i915_sched_attr *attr);
-#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 7c316d4633db..b32088a93232 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -100,8 +100,6 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	intel_gt_suspend(&i915->gt);
 	intel_uc_suspend(&i915->gt.uc);
 
-	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
-
 	i915_gem_drain_freed_objects(i915);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index c9e8c8ccbd47..dcd1cb5efaa7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -89,38 +89,6 @@ struct drm_printer;
 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  */
-enum intel_engine_hangcheck_action {
-	ENGINE_IDLE = 0,
-	ENGINE_WAIT,
-	ENGINE_ACTIVE_SEQNO,
-	ENGINE_ACTIVE_HEAD,
-	ENGINE_ACTIVE_SUBUNITS,
-	ENGINE_WAIT_KICK,
-	ENGINE_DEAD,
-};
-
-static inline const char *
-hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
-{
-	switch (a) {
-	case ENGINE_IDLE:
-		return "idle";
-	case ENGINE_WAIT:
-		return "wait";
-	case ENGINE_ACTIVE_SEQNO:
-		return "active seqno";
-	case ENGINE_ACTIVE_HEAD:
-		return "active head";
-	case ENGINE_ACTIVE_SUBUNITS:
-		return "active subunits";
-	case ENGINE_WAIT_KICK:
-		return "wait kick";
-	case ENGINE_DEAD:
-		return "dead";
-	}
-
-	return "unknown";
-}
 
 static inline unsigned int
 execlists_num_ports(const struct intel_engine_execlists * const execlists)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index dfdffaf35678..fcaf5e69467c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -305,6 +305,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	__sprint_engine_name(engine);
 
 	engine->props.preempt_timeout = CONFIG_DRM_I915_PREEMPT_TIMEOUT;
+	engine->props.heartbeat_interval = CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
 
 	/*
 	 * To be overridden by the backend on setup. However to facilitate
@@ -599,7 +600,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_active(engine, ENGINE_PHYSICAL);
 	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlists(engine);
-	intel_engine_init_hangcheck(engine);
 	intel_engine_init_cmd_parser(engine);
 	intel_engine_init__pm(engine);
 
@@ -1418,8 +1418,13 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		drm_printf(m, "*** WEDGED ***\n");
 
 	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
-	drm_printf(m, "\tHangcheck: %d ms ago\n",
-		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
+
+	rcu_read_lock();
+	rq = READ_ONCE(engine->heartbeat.systole);
+	if (rq)
+		drm_printf(m, "\tHeartbeat: %d ms ago\n",
+			   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+	rcu_read_unlock();
 	drm_printf(m, "\tReset count: %d (global %d)\n",
 		   i915_reset_engine_count(error, engine),
 		   i915_reset_count(error));
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 2fc413f9d506..f68acf9118f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -11,6 +11,27 @@
 #include "intel_engine_pm.h"
 #include "intel_engine.h"
 #include "intel_gt.h"
+#include "intel_reset.h"
+
+/*
+ * While the engine is active, we send a periodic pulse along the engine
+ * to check on its health and to flush any idle-barriers. If that request
+ * is stuck, and we fail to preempt it, we declare the engine hung and
+ * issue a reset -- in the hope that restores progress.
+ */
+
+static void next_heartbeat(struct intel_engine_cs *engine)
+{
+	long delay;
+
+	delay = READ_ONCE(engine->props.heartbeat_interval);
+	if (!delay)
+		return;
+
+	delay = msecs_to_jiffies_timeout(delay);
+	schedule_delayed_work(&engine->heartbeat.work,
+			      round_jiffies_up_relative(delay));
+}
 
 static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
 {
@@ -18,6 +39,100 @@ static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
 	i915_request_add_active_barriers(rq);
 }
 
+static void heartbeat(struct work_struct *wrk)
+{
+	struct i915_sched_attr attr = {
+		.priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
+	};
+	struct intel_engine_cs *engine =
+		container_of(wrk, typeof(*engine), heartbeat.work.work);
+	struct intel_context *ce = engine->kernel_context;
+	struct i915_request *rq;
+
+	if (!intel_engine_pm_get_if_awake(engine))
+		return;
+
+	rq = engine->heartbeat.systole;
+	if (rq && i915_request_completed(rq)) {
+		i915_request_put(rq);
+		engine->heartbeat.systole = NULL;
+	}
+
+	if (intel_gt_is_wedged(engine->gt))
+		goto out;
+
+	if (engine->heartbeat.systole) {
+		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
+			struct drm_printer p = drm_debug_printer(__func__);
+
+			intel_engine_dump(engine, &p,
+					  "%s heartbeat not ticking\n",
+					  engine->name);
+		}
+
+		if (engine->schedule &&
+		    rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
+			attr.priority =
+				I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
+			if (rq->sched.attr.priority >= attr.priority)
+				attr.priority = I915_PRIORITY_BARRIER;
+
+			local_bh_disable();
+			engine->schedule(rq, &attr);
+			local_bh_enable();
+		} else {
+			intel_gt_handle_error(engine->gt, engine->mask,
+					      I915_ERROR_CAPTURE,
+					      "stopped heartbeat on %s",
+					      engine->name);
+		}
+		goto out;
+	}
+
+	if (engine->wakeref_serial == engine->serial)
+		goto out;
+
+	mutex_lock(&ce->timeline->mutex);
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+	intel_context_exit(ce);
+	if (IS_ERR(rq))
+		goto unlock;
+
+	idle_pulse(engine, rq);
+	if (i915_modparams.enable_hangcheck)
+		engine->heartbeat.systole = i915_request_get(rq);
+
+	__i915_request_commit(rq);
+	__i915_request_queue(rq, &attr);
+
+unlock:
+	mutex_unlock(&ce->timeline->mutex);
+out:
+	next_heartbeat(engine);
+	intel_engine_pm_put(engine);
+}
+
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
+{
+	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
+		return;
+
+	next_heartbeat(engine);
+}
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
+{
+	cancel_delayed_work(&engine->heartbeat.work);
+	i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+}
+
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
+{
+	INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
+}
+
 int intel_engine_pulse(struct intel_engine_cs *engine)
 {
 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
index b950451b5998..39391004554d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -9,6 +9,11 @@
 
 struct intel_engine_cs;
 
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine);
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine);
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
+
 int intel_engine_pulse(struct intel_engine_cs *engine);
 
 #endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 86c0b2280b7b..392dc1b9f04e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -7,6 +7,7 @@
 #include "i915_drv.h"
 
 #include "intel_engine.h"
+#include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_pool.h"
 #include "intel_gt.h"
@@ -34,7 +35,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
 	if (engine->unpark)
 		engine->unpark(engine);
 
-	intel_engine_init_hangcheck(engine);
+	intel_engine_unpark_heartbeat(engine);
 	return 0;
 }
 
@@ -158,6 +159,7 @@ static int __engine_park(struct intel_wakeref *wf)
 
 	call_idle_barriers(engine); /* cleanup after wedging */
 
+	intel_engine_park_heartbeat(engine);
 	intel_engine_disarm_breadcrumbs(engine);
 	intel_engine_pool_park(&engine->pool);
 
@@ -188,6 +190,7 @@ void intel_engine_init__pm(struct intel_engine_cs *engine)
 	struct intel_runtime_pm *rpm = &engine->i915->runtime_pm;
 
 	intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
+	intel_engine_init_heartbeat(engine);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
index aac26097c916..8532f9cdc885 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -70,12 +70,38 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
 	return count;
 }
 
+static ssize_t
+heartbeat_interval_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->props.heartbeat_interval);
+}
+
+static ssize_t
+heartbeat_interval_store(struct kobject *kobj, struct kobj_attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	unsigned long delay;
+	int err;
+
+	err = kstrtoul(buf, 0, &delay);
+	if (err)
+		return err;
+
+	engine->props.heartbeat_interval = delay;
+	return count;
+}
+
 static struct kobj_attribute name_attr = __ATTR(name, 0444, name_show, NULL);
 static struct kobj_attribute class_attr = __ATTR(class, 0444, class_show, NULL);
 static struct kobj_attribute inst_attr = __ATTR(instance, 0444, inst_show, NULL);
 static struct kobj_attribute mmio_attr = __ATTR(mmio_base, 0444, mmio_show, NULL);
 static struct kobj_attribute preempt_timeout_attr =
 __ATTR(preempt_timeout_ms, 0600, preempt_timeout_show, preempt_timeout_store);
+static struct kobj_attribute heartbeat_interval_attr =
+__ATTR(heartbeat_interval_ms, 0600, heartbeat_interval_show, heartbeat_interval_store);
 
 static void kobj_engine_release(struct kobject *kobj)
 {
@@ -115,6 +141,9 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
 		&class_attr.attr,
 		&inst_attr.attr,
 		&mmio_attr.attr,
+#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
+		&heartbeat_interval_attr.attr,
+#endif
 		NULL
 	};
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 61d271668948..25af66540a1b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -15,6 +15,7 @@
 #include <linux/rbtree.h>
 #include <linux/timer.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
 
 #include "i915_gem.h"
 #include "i915_pmu.h"
@@ -76,14 +77,6 @@ struct intel_instdone {
 	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
 };
 
-struct intel_engine_hangcheck {
-	u64 acthd;
-	u32 last_ring;
-	u32 last_head;
-	unsigned long action_timestamp;
-	struct intel_instdone instdone;
-};
-
 struct intel_ring {
 	struct kref ref;
 	struct i915_vma *vma;
@@ -328,6 +321,11 @@ struct intel_engine_cs {
 
 	intel_engine_mask_t saturated; /* submitting semaphores too late? */
 
+	struct {
+		struct delayed_work work;
+		struct i915_request *systole;
+	} heartbeat;
+
 	unsigned long serial;
 
 	unsigned long wakeref_serial;
@@ -478,8 +476,6 @@ struct intel_engine_cs {
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
-	struct intel_engine_hangcheck hangcheck;
-
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
@@ -547,6 +543,7 @@ struct intel_engine_cs {
 
 	struct {
 		unsigned long preempt_timeout;
+		unsigned long heartbeat_interval;
 	} props;
 };
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 53220741e49e..4e9e45f834f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -22,7 +22,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&gt->closed_vma);
 	spin_lock_init(&gt->closed_lock);
 
-	intel_gt_init_hangcheck(gt);
 	intel_gt_init_reset(gt);
 	intel_gt_init_requests(gt);
 	intel_gt_pm_init_early(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index e6ab0bff0efb..5b6effed3713 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -46,8 +46,6 @@ void intel_gt_clear_error_registers(struct intel_gt *gt,
 void intel_gt_flush_ggtt_writes(struct intel_gt *gt);
 void intel_gt_chipset_flush(struct intel_gt *gt);
 
-void intel_gt_init_hangcheck(struct intel_gt *gt);
-
 static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
 					  enum intel_gt_scratch_field field)
 {
@@ -59,6 +57,4 @@ static inline bool intel_gt_is_wedged(struct intel_gt *gt)
 	return __intel_reset_failed(&gt->reset);
 }
 
-void intel_gt_queue_hangcheck(struct intel_gt *gt);
-
 #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index b52e2ba3d092..ba31f1c9e0dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -52,7 +52,6 @@ static int __gt_unpark(struct intel_wakeref *wf)
 
 	i915_pmu_gt_unparked(i915);
 
-	intel_gt_queue_hangcheck(gt);
 	intel_gt_unpark_requests(gt);
 
 	pm_notify(gt, INTEL_GT_UNPARK);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 802f516a3430..59f8ee0aa151 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -26,14 +26,6 @@ struct i915_ggtt;
 struct intel_engine_cs;
 struct intel_uncore;
 
-struct intel_hangcheck {
-	/* For hangcheck timer */
-#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
-#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
-
-	struct delayed_work work;
-};
-
 struct intel_gt {
 	struct drm_i915_private *i915;
 	struct intel_uncore *uncore;
@@ -67,7 +59,6 @@ struct intel_gt {
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
 
-	struct intel_hangcheck hangcheck;
 	struct intel_reset reset;
 
 	/**
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
deleted file mode 100644
index 9814b18b32ad..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "i915_drv.h"
-#include "intel_engine.h"
-#include "intel_gt.h"
-#include "intel_reset.h"
-
-struct hangcheck {
-	u64 acthd;
-	u32 ring;
-	u32 head;
-	enum intel_engine_hangcheck_action action;
-	unsigned long action_timestamp;
-	int deadlock;
-	struct intel_instdone instdone;
-	bool wedged:1;
-	bool stalled:1;
-};
-
-static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
-{
-	u32 tmp = current_instdone | *old_instdone;
-	bool unchanged;
-
-	unchanged = tmp == *old_instdone;
-	*old_instdone |= tmp;
-
-	return unchanged;
-}
-
-static bool subunits_stuck(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
-	struct intel_instdone instdone;
-	struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
-	bool stuck;
-	int slice;
-	int subslice;
-
-	intel_engine_get_instdone(engine, &instdone);
-
-	/* There might be unstable subunit states even when
-	 * actual head is not moving. Filter out the unstable ones by
-	 * accumulating the undone -> done transitions and only
-	 * consider those as progress.
-	 */
-	stuck = instdone_unchanged(instdone.instdone,
-				   &accu_instdone->instdone);
-	stuck &= instdone_unchanged(instdone.slice_common,
-				    &accu_instdone->slice_common);
-
-	for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) {
-		stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
-					    &accu_instdone->sampler[slice][subslice]);
-		stuck &= instdone_unchanged(instdone.row[slice][subslice],
-					    &accu_instdone->row[slice][subslice]);
-	}
-
-	return stuck;
-}
-
-static enum intel_engine_hangcheck_action
-head_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
-	if (acthd != engine->hangcheck.acthd) {
-
-		/* Clear subunit states on head movement */
-		memset(&engine->hangcheck.instdone, 0,
-		       sizeof(engine->hangcheck.instdone));
-
-		return ENGINE_ACTIVE_HEAD;
-	}
-
-	if (!subunits_stuck(engine))
-		return ENGINE_ACTIVE_SUBUNITS;
-
-	return ENGINE_DEAD;
-}
-
-static enum intel_engine_hangcheck_action
-engine_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
-	enum intel_engine_hangcheck_action ha;
-	u32 tmp;
-
-	ha = head_stuck(engine, acthd);
-	if (ha != ENGINE_DEAD)
-		return ha;
-
-	if (IS_GEN(engine->i915, 2))
-		return ENGINE_DEAD;
-
-	/* Is the chip hanging on a WAIT_FOR_EVENT?
-	 * If so we can simply poke the RB_WAIT bit
-	 * and break the hang. This should work on
-	 * all but the second generation chipsets.
-	 */
-	tmp = ENGINE_READ(engine, RING_CTL);
-	if (tmp & RING_WAIT) {
-		intel_gt_handle_error(engine->gt, engine->mask, 0,
-				      "stuck wait on %s", engine->name);
-		ENGINE_WRITE(engine, RING_CTL, tmp);
-		return ENGINE_WAIT_KICK;
-	}
-
-	return ENGINE_DEAD;
-}
-
-static void hangcheck_load_sample(struct intel_engine_cs *engine,
-				  struct hangcheck *hc)
-{
-	hc->acthd = intel_engine_get_active_head(engine);
-	hc->ring = ENGINE_READ(engine, RING_START);
-	hc->head = ENGINE_READ(engine, RING_HEAD);
-}
-
-static void hangcheck_store_sample(struct intel_engine_cs *engine,
-				   const struct hangcheck *hc)
-{
-	engine->hangcheck.acthd = hc->acthd;
-	engine->hangcheck.last_ring = hc->ring;
-	engine->hangcheck.last_head = hc->head;
-}
-
-static enum intel_engine_hangcheck_action
-hangcheck_get_action(struct intel_engine_cs *engine,
-		     const struct hangcheck *hc)
-{
-	if (intel_engine_is_idle(engine))
-		return ENGINE_IDLE;
-
-	if (engine->hangcheck.last_ring != hc->ring)
-		return ENGINE_ACTIVE_SEQNO;
-
-	if (engine->hangcheck.last_head != hc->head)
-		return ENGINE_ACTIVE_SEQNO;
-
-	return engine_stuck(engine, hc->acthd);
-}
-
-static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
-					struct hangcheck *hc)
-{
-	unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
-
-	hc->action = hangcheck_get_action(engine, hc);
-
-	/* We always increment the progress
-	 * if the engine is busy and still processing
-	 * the same request, so that no single request
-	 * can run indefinitely (such as a chain of
-	 * batches). The only time we do not increment
-	 * the hangcheck score on this ring, if this
-	 * engine is in a legitimate wait for another
-	 * engine. In that case the waiting engine is a
-	 * victim and we want to be sure we catch the
-	 * right culprit. Then every time we do kick
-	 * the ring, make it as a progress as the seqno
-	 * advancement might ensure and if not, it
-	 * will catch the hanging engine.
-	 */
-
-	switch (hc->action) {
-	case ENGINE_IDLE:
-	case ENGINE_ACTIVE_SEQNO:
-		/* Clear head and subunit states on seqno movement */
-		hc->acthd = 0;
-
-		memset(&engine->hangcheck.instdone, 0,
-		       sizeof(engine->hangcheck.instdone));
-
-		/* Intentional fall through */
-	case ENGINE_WAIT_KICK:
-	case ENGINE_WAIT:
-		engine->hangcheck.action_timestamp = jiffies;
-		break;
-
-	case ENGINE_ACTIVE_HEAD:
-	case ENGINE_ACTIVE_SUBUNITS:
-		/*
-		 * Seqno stuck with still active engine gets leeway,
-		 * in hopes that it is just a long shader.
-		 */
-		timeout = I915_SEQNO_DEAD_TIMEOUT;
-		break;
-
-	case ENGINE_DEAD:
-		break;
-
-	default:
-		MISSING_CASE(hc->action);
-	}
-
-	hc->stalled = time_after(jiffies,
-				 engine->hangcheck.action_timestamp + timeout);
-	hc->wedged = time_after(jiffies,
-				 engine->hangcheck.action_timestamp +
-				 I915_ENGINE_WEDGED_TIMEOUT);
-}
-
-static void hangcheck_declare_hang(struct intel_gt *gt,
-				   intel_engine_mask_t hung,
-				   intel_engine_mask_t stuck)
-{
-	struct intel_engine_cs *engine;
-	intel_engine_mask_t tmp;
-	char msg[80];
-	int len;
-
-	/* If some rings hung but others were still busy, only
-	 * blame the hanging rings in the synopsis.
-	 */
-	if (stuck != hung)
-		hung &= ~stuck;
-	len = scnprintf(msg, sizeof(msg),
-			"%s on ", stuck == hung ? "no progress" : "hang");
-	for_each_engine_masked(engine, gt->i915, hung, tmp)
-		len += scnprintf(msg + len, sizeof(msg) - len,
-				 "%s, ", engine->name);
-	msg[len-2] = '\0';
-
-	return intel_gt_handle_error(gt, hung, I915_ERROR_CAPTURE, "%s", msg);
-}
-
-/*
- * This is called when the chip hasn't reported back with completed
- * batchbuffers in a long time. We keep track per ring seqno progress and
- * if there are no progress, hangcheck score for that ring is increased.
- * Further, acthd is inspected to see if the ring is stuck. On stuck case
- * we kick the ring. If we see no progress on three subsequent calls
- * we assume chip is wedged and try to fix it by resetting the chip.
- */
-static void hangcheck_elapsed(struct work_struct *work)
-{
-	struct intel_gt *gt =
-		container_of(work, typeof(*gt), hangcheck.work.work);
-	intel_engine_mask_t hung = 0, stuck = 0, wedged = 0;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
-
-	if (!i915_modparams.enable_hangcheck)
-		return;
-
-	if (!READ_ONCE(gt->awake))
-		return;
-
-	if (intel_gt_is_wedged(gt))
-		return;
-
-	wakeref = intel_runtime_pm_get_if_in_use(&gt->i915->runtime_pm);
-	if (!wakeref)
-		return;
-
-	/* As enabling the GPU requires fairly extensive mmio access,
-	 * periodically arm the mmio checker to see if we are triggering
-	 * any invalid access.
-	 */
-	intel_uncore_arm_unclaimed_mmio_detection(gt->uncore);
-
-	for_each_engine(engine, gt->i915, id) {
-		struct hangcheck hc;
-
-		intel_engine_breadcrumbs_irq(engine);
-
-		hangcheck_load_sample(engine, &hc);
-		hangcheck_accumulate_sample(engine, &hc);
-		hangcheck_store_sample(engine, &hc);
-
-		if (hc.stalled) {
-			hung |= engine->mask;
-			if (hc.action != ENGINE_DEAD)
-				stuck |= engine->mask;
-		}
-
-		if (hc.wedged)
-			wedged |= engine->mask;
-	}
-
-	if (GEM_SHOW_DEBUG() && (hung | stuck)) {
-		struct drm_printer p = drm_debug_printer("hangcheck");
-
-		for_each_engine(engine, gt->i915, id) {
-			if (intel_engine_is_idle(engine))
-				continue;
-
-			intel_engine_dump(engine, &p, "%s\n", engine->name);
-		}
-	}
-
-	if (wedged) {
-		dev_err(gt->i915->drm.dev,
-			"GPU recovery timed out,"
-			" cancelling all in-flight rendering.\n");
-		GEM_TRACE_DUMP();
-		intel_gt_set_wedged(gt);
-	}
-
-	if (hung)
-		hangcheck_declare_hang(gt, hung, stuck);
-
-	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
-
-	/* Reset timer in case GPU hangs without another request being added */
-	intel_gt_queue_hangcheck(gt);
-}
-
-void intel_gt_queue_hangcheck(struct intel_gt *gt)
-{
-	unsigned long delay;
-
-	if (unlikely(!i915_modparams.enable_hangcheck))
-		return;
-
-	/*
-	 * Don't continually defer the hangcheck so that it is always run at
-	 * least once after work has been scheduled on any ring. Otherwise,
-	 * we will ignore a hung ring if a second ring is kept busy.
-	 */
-
-	delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
-	queue_delayed_work(system_long_wq, &gt->hangcheck.work, delay);
-}
-
-void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
-{
-	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
-	engine->hangcheck.action_timestamp = jiffies;
-}
-
-void intel_gt_init_hangcheck(struct intel_gt *gt)
-{
-	INIT_DELAYED_WORK(&gt->hangcheck.work, hangcheck_elapsed);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_hangcheck.c"
-#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 055496f0825f..806b668d89aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1018,8 +1018,6 @@ void intel_gt_reset(struct intel_gt *gt,
 	if (ret)
 		goto taint;
 
-	intel_gt_queue_hangcheck(gt);
-
 finish:
 	reset_finish(gt, awake);
 unlock:
@@ -1351,4 +1349,5 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_reset.c"
+#include "selftest_hangcheck.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index ffbb3d23b887..b0ca30220193 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1681,7 +1681,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	};
 	struct intel_gt *gt = &i915->gt;
 	intel_wakeref_t wakeref;
-	bool saved_hangcheck;
 	int err;
 
 	if (!intel_has_gpu_reset(gt))
@@ -1691,12 +1690,9 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 		return -EIO; /* we're long past hope of a successful reset */
 
 	wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
-	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
-	drain_delayed_work(&gt->hangcheck.work); /* flush param */
 
 	err = intel_gt_live_subtests(tests, gt);
 
-	i915_modparams.enable_hangcheck = saved_hangcheck;
 	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
 
 	return err;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2afc41e43b6e..15c7cc3a23b3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1004,92 +1004,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	return ret;
 }
 
-static void i915_instdone_info(struct drm_i915_private *dev_priv,
-			       struct seq_file *m,
-			       struct intel_instdone *instdone)
-{
-	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
-	int slice;
-	int subslice;
-
-	seq_printf(m, "\t\tINSTDONE: 0x%08x\n",
-		   instdone->instdone);
-
-	if (INTEL_GEN(dev_priv) <= 3)
-		return;
-
-	seq_printf(m, "\t\tSC_INSTDONE: 0x%08x\n",
-		   instdone->slice_common);
-
-	if (INTEL_GEN(dev_priv) <= 6)
-		return;
-
-	for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice)
-		seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
-			   slice, subslice, instdone->sampler[slice][subslice]);
-
-	for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice)
-		seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n",
-			   slice, subslice, instdone->row[slice][subslice]);
-}
-
-static int i915_hangcheck_info(struct seq_file *m, void *unused)
-{
-	struct drm_i915_private *i915 = node_to_i915(m->private);
-	struct intel_gt *gt = &i915->gt;
-	struct intel_engine_cs *engine;
-	intel_wakeref_t wakeref;
-	enum intel_engine_id id;
-
-	seq_printf(m, "Reset flags: %lx\n", gt->reset.flags);
-	if (test_bit(I915_WEDGED, &gt->reset.flags))
-		seq_puts(m, "\tWedged\n");
-	if (test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
-		seq_puts(m, "\tDevice (global) reset in progress\n");
-
-	if (!i915_modparams.enable_hangcheck) {
-		seq_puts(m, "Hangcheck disabled\n");
-		return 0;
-	}
-
-	if (timer_pending(&gt->hangcheck.work.timer))
-		seq_printf(m, "Hangcheck active, timer fires in %dms\n",
-			   jiffies_to_msecs(gt->hangcheck.work.timer.expires -
-					    jiffies));
-	else if (delayed_work_pending(&gt->hangcheck.work))
-		seq_puts(m, "Hangcheck active, work pending\n");
-	else
-		seq_puts(m, "Hangcheck inactive\n");
-
-	seq_printf(m, "GT active? %s\n", yesno(gt->awake));
-
-	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		for_each_engine(engine, i915, id) {
-			struct intel_instdone instdone;
-
-			seq_printf(m, "%s: %d ms ago\n",
-				   engine->name,
-				   jiffies_to_msecs(jiffies -
-						    engine->hangcheck.action_timestamp));
-
-			seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
-				   (long long)engine->hangcheck.acthd,
-				   intel_engine_get_active_head(engine));
-
-			intel_engine_get_instdone(engine, &instdone);
-
-			seq_puts(m, "\tinstdone read =\n");
-			i915_instdone_info(i915, m, &instdone);
-
-			seq_puts(m, "\tinstdone accu =\n");
-			i915_instdone_info(i915, m,
-					   &engine->hangcheck.instdone);
-		}
-	}
-
-	return 0;
-}
-
 static int ironlake_drpc_info(struct seq_file *m)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
@@ -4305,7 +4219,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_guc_stage_pool", i915_guc_stage_pool, 0},
 	{"i915_huc_load_status", i915_huc_load_status_info, 0},
 	{"i915_frequency_info", i915_frequency_info, 0},
-	{"i915_hangcheck_info", i915_hangcheck_info, 0},
 	{"i915_drpc_info", i915_drpc_info, 0},
 	{"i915_ring_freq_table", i915_ring_freq_table, 0},
 	{"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5323e4fa55d9..949f09bca8ba 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1598,10 +1598,7 @@ void i915_driver_remove(struct drm_i915_private *i915)
 
 	i915_driver_modeset_remove(i915);
 
-	/* Free error state after interrupts are fully disabled. */
-	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
 	i915_reset_error_state(i915);
-
 	i915_gem_driver_remove(i915);
 
 	intel_power_domains_driver_remove(i915);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cb63b2bd0ce8..79ddb44b834d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2205,7 +2205,6 @@ extern const struct dev_pm_ops i915_pm_ops;
 int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
 void i915_driver_remove(struct drm_i915_private *i915);
 
-void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
 static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a28ee754b7b4..a86d28bda6dd 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -534,10 +534,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	}
 	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
 	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
-	err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
-		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
-		   ee->hangcheck_timestamp,
-		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
@@ -679,11 +675,8 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 	ts = ktime_to_timespec64(error->uptime);
 	err_printf(m, "Uptime: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
-	err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
-	err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
-		   error->capture,
-		   jiffies_to_msecs(jiffies - error->capture),
-		   jiffies_to_msecs(error->capture - error->epoch));
+	err_printf(m, "Capture: %lu jiffies; %d ms ago\n",
+		   error->capture, jiffies_to_msecs(jiffies - error->capture));
 
 	for (ee = error->engine; ee; ee = ee->next)
 		err_printf(m, "Active process (on ring %s): %s [%d]\n",
@@ -742,7 +735,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 		err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache);
 
 	for (ee = error->engine; ee; ee = ee->next)
-		error_print_engine(m, ee, error->epoch);
+		error_print_engine(m, ee, error->capture);
 
 	for (ee = error->engine; ee; ee = ee->next) {
 		const struct drm_i915_error_object *obj;
@@ -770,7 +763,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 			for (j = 0; j < ee->num_requests; j++)
 				error_print_request(m, " ",
 						    &ee->requests[j],
-						    error->epoch);
+						    error->capture);
 		}
 
 		print_error_obj(m, ee->engine, "ringbuffer", ee->ringbuffer);
@@ -1144,8 +1137,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	}
 
 	ee->idle = intel_engine_is_idle(engine);
-	if (!ee->idle)
-		ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
 	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
 						  engine);
 
@@ -1658,20 +1649,6 @@ static void capture_params(struct i915_gpu_state *error)
 	i915_params_copy(&error->params, &i915_modparams);
 }
 
-static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
-{
-	const struct drm_i915_error_engine *ee;
-	unsigned long epoch = error->capture;
-
-	for (ee = error->engine; ee; ee = ee->next) {
-		if (ee->hangcheck_timestamp &&
-		    time_before(ee->hangcheck_timestamp, epoch))
-			epoch = ee->hangcheck_timestamp;
-	}
-
-	return epoch;
-}
-
 static void capture_finish(struct i915_gpu_state *error)
 {
 	struct i915_ggtt *ggtt = &error->i915->ggtt;
@@ -1723,8 +1700,6 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
 	error->overlay = intel_overlay_capture_error_state(i915);
 	error->display = intel_display_capture_error_state(i915);
 
-	error->epoch = capture_find_epoch(error);
-
 	capture_finish(error);
 	compress_fini(&compress);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 63cf387411e0..caaed5093d95 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -34,7 +34,6 @@ struct i915_gpu_state {
 	ktime_t boottime;
 	ktime_t uptime;
 	unsigned long capture;
-	unsigned long epoch;
 
 	struct drm_i915_private *i915;
 
@@ -86,7 +85,6 @@ struct i915_gpu_state {
 
 		/* Software tracked state */
 		bool idle;
-		unsigned long hangcheck_timestamp;
 		int num_requests;
 		u32 reset_count;
 
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index ae8bb3cb627e..732aad148881 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -16,6 +16,12 @@ enum {
 	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
 	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
 	I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,
+
+	/* A preemptive pulse used to monitor the health of each engine */
+	I915_PRIORITY_HEARTBEAT,
+
+	/* Interactive workload, scheduled for immediate pageflipping */
+	I915_PRIORITY_DISPLAY,
 };
 
 #define I915_USER_PRIORITY_SHIFT 2
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 22/30] drm/i915: Remove logical HW ID
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (19 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 21/30] drm/i915: Replace hangcheck by heartbeats Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 16:07   ` Tvrtko Ursulin
  2019-10-02 11:19 ` [PATCH 23/30] drm/i915: Move context management under GEM Chris Wilson
                   ` (13 subsequent siblings)
  34 siblings, 1 reply; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

With the introduction of ctx->engines[] we allow multiple logical
contexts to be used on the same engine (e.g. with virtual engines).
According to bspec, aach logical context requires a unique tag in order
for context-switching to occur correctly between them. [Simple
experiments show that it is not so easy to trick the HW into performing
a lite-restore with matching logical IDs, though my memory from early
Broadwell experiments do suggest that it should be generating
lite-restores.]

We only need to keep a unique tag for the active lifetime of the
context, and for as long as we need to identify that context. The HW
uses the tag to determine if it should use a lite-restore (why not the
LRCA?) and passes the tag back for various status identifies. The only
status we need to track is for OA, so when using perf, we assign the
specific context a unique tag.

v2: Calculate required number of tags to fill ELSP.

Fixes: 976b55f0e1db ("drm/i915: Allow a context to define its set of engines")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 147 ------------------
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 --
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  18 ---
 .../drm/i915/gem/selftests/i915_gem_context.c |  13 +-
 .../gpu/drm/i915/gem/selftests/mock_context.c |   8 -
 drivers/gpu/drm/i915/gt/intel_context_types.h |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   4 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  32 ++--
 drivers/gpu/drm/i915/gvt/kvmgt.c              |  17 --
 drivers/gpu/drm/i915/i915_debugfs.c           |   3 -
 drivers/gpu/drm/i915/i915_gpu_error.c         |   7 +-
 drivers/gpu/drm/i915/i915_gpu_error.h         |   1 -
 drivers/gpu/drm/i915/i915_perf.c              |  25 ++-
 drivers/gpu/drm/i915/i915_trace.h             |  38 ++---
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   2 +-
 16 files changed, 51 insertions(+), 284 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e832238a72e5..4e59b809d901 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -168,97 +168,6 @@ lookup_user_engine(struct i915_gem_context *ctx,
 	return i915_gem_context_get_engine(ctx, idx);
 }
 
-static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
-{
-	unsigned int max;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	if (INTEL_GEN(i915) >= 12)
-		max = GEN12_MAX_CONTEXT_HW_ID;
-	else if (INTEL_GEN(i915) >= 11)
-		max = GEN11_MAX_CONTEXT_HW_ID;
-	else if (USES_GUC_SUBMISSION(i915))
-		/*
-		 * When using GuC in proxy submission, GuC consumes the
-		 * highest bit in the context id to indicate proxy submission.
-		 */
-		max = MAX_GUC_CONTEXT_HW_ID;
-	else
-		max = MAX_CONTEXT_HW_ID;
-
-	return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
-}
-
-static int steal_hw_id(struct drm_i915_private *i915)
-{
-	struct i915_gem_context *ctx, *cn;
-	LIST_HEAD(pinned);
-	int id = -ENOSPC;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	list_for_each_entry_safe(ctx, cn,
-				 &i915->contexts.hw_id_list, hw_id_link) {
-		if (atomic_read(&ctx->hw_id_pin_count)) {
-			list_move_tail(&ctx->hw_id_link, &pinned);
-			continue;
-		}
-
-		GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
-		list_del_init(&ctx->hw_id_link);
-		id = ctx->hw_id;
-		break;
-	}
-
-	/*
-	 * Remember how far we got up on the last repossesion scan, so the
-	 * list is kept in a "least recently scanned" order.
-	 */
-	list_splice_tail(&pinned, &i915->contexts.hw_id_list);
-	return id;
-}
-
-static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
-{
-	int ret;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	/*
-	 * We prefer to steal/stall ourselves and our users over that of the
-	 * entire system. That may be a little unfair to our users, and
-	 * even hurt high priority clients. The choice is whether to oomkill
-	 * something else, or steal a context id.
-	 */
-	ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
-	if (unlikely(ret < 0)) {
-		ret = steal_hw_id(i915);
-		if (ret < 0) /* once again for the correct errno code */
-			ret = new_hw_id(i915, GFP_KERNEL);
-		if (ret < 0)
-			return ret;
-	}
-
-	*out = ret;
-	return 0;
-}
-
-static void release_hw_id(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
-
-	if (list_empty(&ctx->hw_id_link))
-		return;
-
-	mutex_lock(&i915->contexts.mutex);
-	if (!list_empty(&ctx->hw_id_link)) {
-		ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
-		list_del_init(&ctx->hw_id_link);
-	}
-	mutex_unlock(&i915->contexts.mutex);
-}
-
 static void __free_engines(struct i915_gem_engines *e, unsigned int count)
 {
 	while (count--) {
@@ -313,8 +222,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
-	release_hw_id(ctx);
-
 	free_engines(rcu_access_pointer(ctx->engines));
 	mutex_destroy(&ctx->engines_mutex);
 
@@ -459,12 +366,6 @@ static void context_close(struct i915_gem_context *ctx)
 
 	ctx->file_priv = ERR_PTR(-EBADF);
 
-	/*
-	 * This context will never again be assinged to HW, so we can
-	 * reuse its ID for the next context.
-	 */
-	release_hw_id(ctx);
-
 	/*
 	 * The LUT uses the VMA as a backpointer to unref the object,
 	 * so we need to clear the LUT before we close all the VMA (inside
@@ -507,7 +408,6 @@ __create_context(struct drm_i915_private *i915)
 	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->hw_id_link);
 
 	/* NB: Mark all slices as needing a remap so that when the context first
 	 * loads it will restore whatever remap state already exists. If there
@@ -676,18 +576,11 @@ struct i915_gem_context *
 i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 {
 	struct i915_gem_context *ctx;
-	int err;
 
 	ctx = i915_gem_create_context(i915, 0);
 	if (IS_ERR(ctx))
 		return ctx;
 
-	err = i915_gem_context_pin_hw_id(ctx);
-	if (err) {
-		destroy_kernel_context(&ctx);
-		return ERR_PTR(err);
-	}
-
 	i915_gem_context_clear_bannable(ctx);
 	i915_gem_context_set_persistence(ctx);
 	ctx->sched.priority = I915_USER_PRIORITY(prio);
@@ -727,15 +620,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 		DRM_ERROR("Failed to create default global context\n");
 		return PTR_ERR(ctx);
 	}
-	/*
-	 * For easy recognisablity, we want the kernel context to be 0 and then
-	 * all user contexts will have non-zero hw_id. Kernel contexts are
-	 * permanently pinned, so that we never suffer a stall and can
-	 * use them from any allocation context (e.g. for evicting other
-	 * contexts and from inside the shrinker).
-	 */
-	GEM_BUG_ON(ctx->hw_id);
-	GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
 	dev_priv->kernel_context = ctx;
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
@@ -749,10 +633,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
 	destroy_kernel_context(&i915->kernel_context);
-
-	/* Must free all deferred contexts (via flush_workqueue) first */
-	GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
-	ida_destroy(&i915->contexts.hw_ida);
 }
 
 static int context_idr_cleanup(int id, void *p, void *data)
@@ -2438,33 +2318,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
 	return ret;
 }
 
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
-	int err = 0;
-
-	mutex_lock(&i915->contexts.mutex);
-
-	GEM_BUG_ON(i915_gem_context_is_closed(ctx));
-
-	if (list_empty(&ctx->hw_id_link)) {
-		GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
-
-		err = assign_hw_id(i915, &ctx->hw_id);
-		if (err)
-			goto out_unlock;
-
-		list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
-	}
-
-	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
-	atomic_inc(&ctx->hw_id_pin_count);
-
-out_unlock:
-	mutex_unlock(&i915->contexts.mutex);
-	return err;
-}
-
 /* GEM context-engines iterator: for_each_gem_engine() */
 struct intel_context *
 i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index e0f5b6c6a331..1b0df53436cf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -127,21 +127,6 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
 	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
 }
 
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
-static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
-	if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
-		return 0;
-
-	return __i915_gem_context_pin_hw_id(ctx);
-}
-
-static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
-{
-	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
-	atomic_dec(&ctx->hw_id_pin_count);
-}
-
 static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 {
 	return !ctx->file_priv;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index daf1ea5075a6..6419da7c9f90 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -148,24 +148,6 @@ struct i915_gem_context {
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
 #define CONTEXT_USER_ENGINES		3
 
-	/**
-	 * @hw_id: - unique identifier for the context
-	 *
-	 * The hardware needs to uniquely identify the context for a few
-	 * functions like fault reporting, PASID, scheduling. The
-	 * &drm_i915_private.context_hw_ida is used to assign a unqiue
-	 * id for the lifetime of the context.
-	 *
-	 * @hw_id_pin_count: - number of times this context had been pinned
-	 * for use (should be, at most, once per engine).
-	 *
-	 * @hw_id_link: - all contexts with an assigned id are tracked
-	 * for possible repossession.
-	 */
-	unsigned int hw_id;
-	atomic_t hw_id_pin_count;
-	struct list_head hw_id_link;
-
 	struct mutex mutex;
 
 	struct i915_sched_attr sched;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 2288757808ae..2fb31ada2fa7 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -660,9 +660,9 @@ static int igt_ctx_exec(void *arg)
 
 			err = gpu_fill(ce, obj, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
+				       engine->name,
 				       yesno(!!ctx->vm), err);
 				intel_context_put(ce);
 				kernel_context_close(ctx);
@@ -798,9 +798,9 @@ static int igt_shared_ctx_exec(void *arg)
 
 			err = gpu_fill(ce, obj, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
+				       engine->name,
 				       yesno(!!ctx->vm), err);
 				intel_context_put(ce);
 				kernel_context_close(ctx);
@@ -1382,10 +1382,9 @@ static int igt_ctx_readonly(void *arg)
 
 			err = gpu_fill(ce, obj, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       ce->engine->name, ctx->hw_id,
-				       yesno(!!ctx->vm), err);
+				       ce->engine->name, yesno(!!ctx->vm), err);
 				i915_gem_context_unlock_engines(ctx);
 				goto out_unlock;
 			}
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 0d1a275ec316..ebc46f098561 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915,
 {
 	struct i915_gem_context *ctx;
 	struct i915_gem_engines *e;
-	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -32,13 +31,8 @@ mock_context(struct drm_i915_private *i915,
 	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->hw_id_link);
 	mutex_init(&ctx->mutex);
 
-	ret = i915_gem_context_pin_hw_id(ctx);
-	if (ret < 0)
-		goto err_engines;
-
 	if (name) {
 		struct i915_ppgtt *ppgtt;
 
@@ -56,8 +50,6 @@ mock_context(struct drm_i915_private *i915,
 
 	return ctx;
 
-err_engines:
-	free_engines(rcu_access_pointer(ctx->engines));
 err_free:
 	kfree(ctx);
 	return NULL;
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index bf9cedfccbf0..6959b05ae5f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -58,6 +58,7 @@ struct intel_context {
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
+	u32 tag; /* cookie passed to HW to track this context on submission */
 
 	unsigned int active_count; /* protected by timeline->mutex */
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 25af66540a1b..ad3be2fbd71a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -301,10 +301,12 @@ struct intel_engine_cs {
 	u8 uabi_class;
 	u8 uabi_instance;
 
+	u32 uabi_capabilities;
 	u32 context_size;
 	u32 mmio_base;
 
-	u32 uabi_capabilities;
+	unsigned int context_tag;
+#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
 
 	struct rb_node uabi_node;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 6dcceaf02e00..890cd9ab07a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -443,12 +443,8 @@ assert_priority_queue(const struct i915_request *prev,
 static u64
 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 {
-	struct i915_gem_context *ctx = ce->gem_context;
 	u64 desc;
 
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
-	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
-
 	desc = INTEL_LEGACY_32B_CONTEXT;
 	if (i915_vm_is_4lvl(ce->vm))
 		desc = INTEL_LEGACY_64B_CONTEXT;
@@ -466,20 +462,11 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 	 * anything below.
 	 */
 	if (INTEL_GEN(engine->i915) >= 11) {
-		GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
-		desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
-								/* bits 37-47 */
-
 		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
 								/* bits 48-53 */
 
-		/* TODO: decide what to do with SW counter (bits 55-60) */
-
 		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
 								/* bits 61-63 */
-	} else {
-		GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
-		desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;	/* bits 32-52 */
 	}
 
 	return desc;
@@ -988,6 +975,18 @@ __execlists_schedule_in(struct i915_request *rq)
 
 	intel_context_get(ce);
 
+	if (ce->tag) {
+		/* Use a fixed tag for OA and friends */
+		ce->lrc_desc |= (u64)ce->tag << 32;
+	} else {
+		/* We don't need a strict matching tag, just different values */
+		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
+		ce->lrc_desc |=
+			(u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
+			GEN11_SW_CTX_ID_SHIFT;
+		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+	}
+
 	intel_gt_pm_get(engine->gt);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
 	intel_engine_context_in(engine);
@@ -2235,7 +2234,6 @@ static void execlists_context_unpin(struct intel_context *ce)
 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
 		      ce->engine);
 
-	i915_gem_context_unpin_hw_id(ce->gem_context);
 	i915_gem_object_unpin_map(ce->state->obj);
 	intel_ring_reset(ce->ring, ce->ring->tail);
 }
@@ -2285,18 +2283,12 @@ __execlists_context_pin(struct intel_context *ce,
 		goto unpin_active;
 	}
 
-	ret = i915_gem_context_pin_hw_id(ce->gem_context);
-	if (ret)
-		goto unpin_map;
-
 	ce->lrc_desc = lrc_descriptor(ce, engine);
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	__execlists_update_reg_state(ce, engine);
 
 	return 0;
 
-unpin_map:
-	i915_gem_object_unpin_map(ce->state->obj);
 unpin_active:
 	intel_context_active_release(ce);
 err:
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 343d79c1cb7e..04a5a0d90823 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1564,27 +1564,10 @@ vgpu_id_show(struct device *dev, struct device_attribute *attr,
 	return sprintf(buf, "\n");
 }
 
-static ssize_t
-hw_id_show(struct device *dev, struct device_attribute *attr,
-	   char *buf)
-{
-	struct mdev_device *mdev = mdev_from_dev(dev);
-
-	if (mdev) {
-		struct intel_vgpu *vgpu = (struct intel_vgpu *)
-			mdev_get_drvdata(mdev);
-		return sprintf(buf, "%u\n",
-			       vgpu->submission.shadow[0]->gem_context->hw_id);
-	}
-	return sprintf(buf, "\n");
-}
-
 static DEVICE_ATTR_RO(vgpu_id);
-static DEVICE_ATTR_RO(hw_id);
 
 static struct attribute *intel_vgpu_attrs[] = {
 	&dev_attr_vgpu_id.attr,
-	&dev_attr_hw_id.attr,
 	NULL
 };
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 15c7cc3a23b3..454de27c4912 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1507,9 +1507,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		struct intel_context *ce;
 
 		seq_puts(m, "HW context ");
-		if (!list_empty(&ctx->hw_id_link))
-			seq_printf(m, "%x [pin %u]", ctx->hw_id,
-				   atomic_read(&ctx->hw_id_pin_count));
 		if (ctx->pid) {
 			struct task_struct *task;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a86d28bda6dd..47239df653f2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -471,9 +471,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
 				const struct drm_i915_error_context *ctx)
 {
-	err_printf(m, "%s%s[%d] hw_id %d, prio %d, guilty %d active %d\n",
-		   header, ctx->comm, ctx->pid, ctx->hw_id,
-		   ctx->sched_attr.priority, ctx->guilty, ctx->active);
+	err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
+		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
+		   ctx->guilty, ctx->active);
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
@@ -1262,7 +1262,6 @@ static bool record_context(struct drm_i915_error_context *e,
 		rcu_read_unlock();
 	}
 
-	e->hw_id = ctx->hw_id;
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index caaed5093d95..4dc36d6ee3a2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -117,7 +117,6 @@ struct i915_gpu_state {
 		struct drm_i915_error_context {
 			char comm[TASK_COMM_LEN];
 			pid_t pid;
-			u32 hw_id;
 			int active;
 			int guilty;
 			struct i915_sched_attr sched_attr;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 80055501eccb..ecfbc37b738b 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1283,22 +1283,15 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		} else {
 			stream->specific_ctx_id_mask =
 				(1U << GEN8_CTX_ID_WIDTH) - 1;
-			stream->specific_ctx_id =
-				upper_32_bits(ce->lrc_desc);
-			stream->specific_ctx_id &=
-				stream->specific_ctx_id_mask;
+			stream->specific_ctx_id = stream->specific_ctx_id_mask;
 		}
 		break;
 
 	case 11:
 	case 12: {
 		stream->specific_ctx_id_mask =
-			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
-			((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
-			((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
-		stream->specific_ctx_id = upper_32_bits(ce->lrc_desc);
-		stream->specific_ctx_id &=
-			stream->specific_ctx_id_mask;
+			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
+		stream->specific_ctx_id = stream->specific_ctx_id_mask;
 		break;
 	}
 
@@ -1306,6 +1299,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		MISSING_CASE(INTEL_GEN(i915));
 	}
 
+	ce->tag = stream->specific_ctx_id_mask;
+
 	DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
 			 stream->specific_ctx_id,
 			 stream->specific_ctx_id_mask);
@@ -1324,12 +1319,14 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct intel_context *ce;
 
-	stream->specific_ctx_id = INVALID_CTX_ID;
-	stream->specific_ctx_id_mask = 0;
-
 	ce = fetch_and_zero(&stream->pinned_ctx);
-	if (ce)
+	if (ce) {
+		ce->tag = 0; /* recomputed on next submission after parking */
 		intel_context_unpin(ce);
+	}
+
+	stream->specific_ctx_id = INVALID_CTX_ID;
+	stream->specific_ctx_id_mask = 0;
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 24f2944da09d..1f2cf6cfafb5 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -665,7 +665,6 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -675,7 +674,6 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -683,10 +681,9 @@ TRACE_EVENT(i915_request_queue,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
-		      __entry->flags)
+		      __entry->ctx, __entry->seqno, __entry->flags)
 );
 
 DECLARE_EVENT_CLASS(i915_request,
@@ -695,7 +692,6 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -704,16 +700,15 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno)
+		      __entry->ctx, __entry->seqno)
 );
 
 DEFINE_EVENT(i915_request, i915_request_add,
@@ -738,7 +733,6 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -749,7 +743,6 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -758,9 +751,9 @@ TRACE_EVENT(i915_request_in,
 			   __entry->port = port;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
+		      __entry->ctx, __entry->seqno,
 		      __entry->prio, __entry->port)
 );
 
@@ -770,7 +763,6 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -780,7 +772,6 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -788,10 +779,9 @@ TRACE_EVENT(i915_request_out,
 			   __entry->completed = i915_request_completed(rq);
 			   ),
 
-		    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u",
+		    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u",
 			      __entry->dev, __entry->class, __entry->instance,
-			      __entry->hw_id, __entry->ctx, __entry->seqno,
-			      __entry->completed)
+			      __entry->ctx, __entry->seqno, __entry->completed)
 );
 
 #else
@@ -829,7 +819,6 @@ TRACE_EVENT(i915_request_wait_begin,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -845,7 +834,6 @@ TRACE_EVENT(i915_request_wait_begin,
 	     */
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -853,9 +841,9 @@ TRACE_EVENT(i915_request_wait_begin,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
+		      __entry->ctx, __entry->seqno,
 		      __entry->flags)
 );
 
@@ -958,19 +946,17 @@ DECLARE_EVENT_CLASS(i915_context,
 	TP_STRUCT__entry(
 			__field(u32, dev)
 			__field(struct i915_gem_context *, ctx)
-			__field(u32, hw_id)
 			__field(struct i915_address_space *, vm)
 	),
 
 	TP_fast_assign(
 			__entry->dev = ctx->i915->drm.primary->index;
 			__entry->ctx = ctx;
-			__entry->hw_id = ctx->hw_id;
 			__entry->vm = ctx->vm;
 	),
 
-	TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u",
-		  __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id)
+	TP_printk("dev=%u, ctx=%p, ctx_vm=%p",
+		  __entry->dev, __entry->ctx, __entry->vm)
 )
 
 DEFINE_EVENT(i915_context, i915_context_create,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 52d2df843148..f39f0282e78c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -491,8 +491,8 @@ static int igt_evict_contexts(void *arg)
 			if (IS_ERR(rq)) {
 				/* When full, fail_if_busy will trigger EBUSY */
 				if (PTR_ERR(rq) != -EBUSY) {
-					pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n",
-					       ctx->hw_id, engine->name,
+					pr_err("Unexpected error from request alloc (on %s): %d\n",
+					       engine->name,
 					       (int)PTR_ERR(rq));
 					err = PTR_ERR(rq);
 				}
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 1c9db08f7c28..ac1ff558eb90 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -170,7 +170,7 @@ static int igt_vma_create(void *arg)
 		}
 
 		nc = 0;
-		for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) {
+		for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) {
 			for (; nc < num_ctx; nc++) {
 				ctx = mock_context(i915, "mock");
 				if (!ctx)
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 23/30] drm/i915: Move context management under GEM
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (20 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 22/30] drm/i915: Remove logical HW ID Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 24/30] drm/i915/overlay: Drop struct_mutex guard Chris Wilson
                   ` (12 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Keep track of the GEM contexts underneath i915->gem.contexts and assign
them their own lock for the purposes of list management.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 155 +++++++-----------
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |   4 +-
 .../gpu/drm/i915/gem/i915_gem_context_types.h |   2 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |   2 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  12 +-
 .../drm/i915/gem/selftests/i915_gem_context.c | 143 +++++++---------
 .../gpu/drm/i915/gem/selftests/mock_context.c |   4 +-
 drivers/gpu/drm/i915/gt/intel_context.c       |   8 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   5 +
 drivers/gpu/drm/i915/gt/selftest_context.c    |  24 +--
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  19 +--
 drivers/gpu/drm/i915/gt/selftest_lrc.c        |   4 +-
 .../gpu/drm/i915/gt/selftest_workarounds.c    |  10 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |  20 +--
 drivers/gpu/drm/i915/i915_debugfs.c           |  50 +++---
 drivers/gpu/drm/i915/i915_drv.c               |   2 -
 drivers/gpu/drm/i915/i915_drv.h               |  27 +--
 drivers/gpu/drm/i915/i915_gem.c               |  10 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   4 +-
 drivers/gpu/drm/i915/i915_perf.c              |  24 ++-
 drivers/gpu/drm/i915/i915_sysfs.c             |  77 ++++-----
 drivers/gpu/drm/i915/i915_trace.h             |   2 +-
 drivers/gpu/drm/i915/selftests/i915_gem.c     |   8 -
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |   3 -
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   6 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |  11 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   5 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   6 +-
 29 files changed, 286 insertions(+), 363 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 4e59b809d901..a77f439358d7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -219,9 +219,12 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
+	spin_lock(&ctx->i915->gem.contexts.lock);
+	list_del(&ctx->link);
+	spin_unlock(&ctx->i915->gem.contexts.lock);
+
 	free_engines(rcu_access_pointer(ctx->engines));
 	mutex_destroy(&ctx->engines_mutex);
 
@@ -231,56 +234,40 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	kfree(ctx->name);
 	put_pid(ctx->pid);
 
-	list_del(&ctx->link);
 	mutex_destroy(&ctx->mutex);
 
 	kfree_rcu(ctx, rcu);
 }
 
-static void contexts_free(struct drm_i915_private *i915)
+static void contexts_free_all(struct llist_node *list)
 {
-	struct llist_node *freed = llist_del_all(&i915->contexts.free_list);
 	struct i915_gem_context *ctx, *cn;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	llist_for_each_entry_safe(ctx, cn, freed, free_link)
+	llist_for_each_entry_safe(ctx, cn, list, free_link)
 		i915_gem_context_free(ctx);
 }
 
-static void contexts_free_first(struct drm_i915_private *i915)
+static void contexts_flush_free(struct i915_gem_contexts *gc)
 {
-	struct i915_gem_context *ctx;
-	struct llist_node *freed;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	freed = llist_del_first(&i915->contexts.free_list);
-	if (!freed)
-		return;
-
-	ctx = container_of(freed, typeof(*ctx), free_link);
-	i915_gem_context_free(ctx);
+	contexts_free_all(llist_del_all(&gc->free_list));
 }
 
 static void contexts_free_worker(struct work_struct *work)
 {
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), contexts.free_work);
+	struct i915_gem_contexts *gc =
+		container_of(work, typeof(*gc), free_work);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	contexts_free(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
+	contexts_flush_free(gc);
 }
 
 void i915_gem_context_release(struct kref *ref)
 {
 	struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
-	struct drm_i915_private *i915 = ctx->i915;
+	struct i915_gem_contexts *gc = &ctx->i915->gem.contexts;
 
 	trace_i915_context_free(ctx);
-	if (llist_add(&ctx->free_link, &i915->contexts.free_list))
-		queue_work(i915->wq, &i915->contexts.free_work);
+	if (llist_add(&ctx->free_link, &gc->free_list))
+		queue_work(ctx->i915->wq, &gc->free_work);
 }
 
 static inline struct i915_gem_engines *
@@ -359,8 +346,8 @@ static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
 
-	if (ctx->vm)
-		i915_vm_close(ctx->vm);
+	if (rcu_access_pointer(ctx->vm))
+		i915_vm_close(rcu_dereference_protected(ctx->vm, true));
 
 	mutex_lock(&ctx->mutex);
 
@@ -394,7 +381,6 @@ __create_context(struct drm_i915_private *i915)
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ctx->ref);
-	list_add_tail(&ctx->link, &i915->contexts.list);
 	ctx->i915 = i915;
 	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
 	mutex_init(&ctx->mutex);
@@ -435,6 +421,10 @@ __create_context(struct drm_i915_private *i915)
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
+	spin_lock(&i915->gem.contexts.lock);
+	list_add_tail(&ctx->link, &i915->gem.contexts.list);
+	spin_unlock(&i915->gem.contexts.lock);
+
 	return ctx;
 
 err_free:
@@ -464,11 +454,11 @@ static void __apply_ppgtt(struct intel_context *ce, void *vm)
 static struct i915_address_space *
 __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 {
-	struct i915_address_space *old = ctx->vm;
+	struct i915_address_space *old = rcu_dereference_protected(ctx->vm, 1);
 
 	GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old));
 
-	ctx->vm = i915_vm_open(vm);
+	rcu_assign_pointer(ctx->vm, i915_vm_open(vm));
 	context_apply_all(ctx, __apply_ppgtt, vm);
 
 	return old;
@@ -477,7 +467,7 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 static void __assign_ppgtt(struct i915_gem_context *ctx,
 			   struct i915_address_space *vm)
 {
-	if (vm == ctx->vm)
+	if (vm == rcu_access_pointer(ctx->vm))
 		return;
 
 	vm = __set_ppgtt(ctx, vm);
@@ -509,27 +499,25 @@ static void __assign_timeline(struct i915_gem_context *ctx,
 }
 
 static struct i915_gem_context *
-i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
+i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
 {
 	struct i915_gem_context *ctx;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
-	    !HAS_EXECLISTS(dev_priv))
+	    !HAS_EXECLISTS(i915))
 		return ERR_PTR(-EINVAL);
 
-	/* Reap the most stale context */
-	contexts_free_first(dev_priv);
+	/* Reap the stale contexts */
+	contexts_flush_free(&i915->gem.contexts);
 
-	ctx = __create_context(dev_priv);
+	ctx = __create_context(i915);
 	if (IS_ERR(ctx))
 		return ctx;
 
-	if (HAS_FULL_PPGTT(dev_priv)) {
+	if (HAS_FULL_PPGTT(i915)) {
 		struct i915_ppgtt *ppgtt;
 
-		ppgtt = i915_ppgtt_create(dev_priv);
+		ppgtt = i915_ppgtt_create(i915);
 		if (IS_ERR(ppgtt)) {
 			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 					 PTR_ERR(ppgtt));
@@ -544,7 +532,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
 		struct intel_timeline *timeline;
 
-		timeline = intel_timeline_create(&dev_priv->gt, NULL);
+		timeline = intel_timeline_create(&i915->gt, NULL);
 		if (IS_ERR(timeline)) {
 			context_close(ctx);
 			return ERR_CAST(timeline);
@@ -590,48 +578,40 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	return ctx;
 }
 
-static void init_contexts(struct drm_i915_private *i915)
+static void init_contexts(struct i915_gem_contexts *gc)
 {
-	mutex_init(&i915->contexts.mutex);
-	INIT_LIST_HEAD(&i915->contexts.list);
-
-	/* Using the simple ida interface, the max is limited by sizeof(int) */
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
-	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
-	ida_init(&i915->contexts.hw_ida);
-	INIT_LIST_HEAD(&i915->contexts.hw_id_list);
+	spin_lock_init(&gc->lock);
+	INIT_LIST_HEAD(&gc->list);
 
-	INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
-	init_llist_head(&i915->contexts.free_list);
+	INIT_WORK(&gc->free_work, contexts_free_worker);
+	init_llist_head(&gc->free_list);
 }
 
-int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
+int i915_gem_init_contexts(struct drm_i915_private *i915)
 {
 	struct i915_gem_context *ctx;
 
 	/* Reassure ourselves we are only called once */
-	GEM_BUG_ON(dev_priv->kernel_context);
+	GEM_BUG_ON(i915->kernel_context);
 
-	init_contexts(dev_priv);
+	init_contexts(&i915->gem.contexts);
 
 	/* lowest priority; idle task */
-	ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
+	ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MIN);
 	if (IS_ERR(ctx)) {
 		DRM_ERROR("Failed to create default global context\n");
 		return PTR_ERR(ctx);
 	}
-	dev_priv->kernel_context = ctx;
+	i915->kernel_context = ctx;
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
-			 DRIVER_CAPS(dev_priv)->has_logical_contexts ?
+			 DRIVER_CAPS(i915)->has_logical_contexts ?
 			 "logical" : "fake");
 	return 0;
 }
 
-void i915_gem_contexts_fini(struct drm_i915_private *i915)
+void i915_gem_driver_release__contexts(struct drm_i915_private *i915)
 {
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	destroy_kernel_context(&i915->kernel_context);
 }
 
@@ -653,8 +633,8 @@ static int gem_context_register(struct i915_gem_context *ctx,
 	int ret;
 
 	ctx->file_priv = fpriv;
-	if (ctx->vm)
-		ctx->vm->file = fpriv;
+	if (rcu_access_pointer(ctx->vm))
+		rcu_dereference_protected(ctx->vm, true)->file = fpriv;
 
 	ctx->pid = get_task_pid(current, PIDTYPE_PID);
 	ctx->name = kasprintf(GFP_KERNEL, "%s[%d]",
@@ -691,9 +671,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	idr_init(&file_priv->context_idr);
 	idr_init_base(&file_priv->vm_idr, 1);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx = i915_gem_create_context(i915, 0);
-	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err;
@@ -721,6 +699,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 void i915_gem_context_close(struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_private *i915 = file_priv->dev_priv;
 
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 	idr_destroy(&file_priv->context_idr);
@@ -729,6 +708,8 @@ void i915_gem_context_close(struct drm_file *file)
 	idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
 	idr_destroy(&file_priv->vm_idr);
 	mutex_destroy(&file_priv->vm_idr_lock);
+
+	contexts_flush_free(&i915->gem.contexts);
 }
 
 int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
@@ -907,16 +888,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
 	struct i915_address_space *vm;
 	int ret;
 
-	if (!ctx->vm)
+	if (!rcu_access_pointer(ctx->vm))
 		return -ENODEV;
 
-	/* XXX rcu acquire? */
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
+	rcu_read_lock();
 	vm = i915_vm_get(ctx->vm);
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
+	rcu_read_unlock();
 
 	ret = mutex_lock_interruptible(&file_priv->vm_idr_lock);
 	if (ret)
@@ -1025,7 +1002,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 	if (args->size)
 		return -EINVAL;
 
-	if (!ctx->vm)
+	if (!rcu_access_pointer(ctx->vm))
 		return -ENODEV;
 
 	if (upper_32_bits(args->value))
@@ -1039,17 +1016,15 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 	if (!vm)
 		return -ENOENT;
 
-	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	err = mutex_lock_interruptible(&ctx->mutex);
 	if (err)
 		goto out;
 
-	if (vm == ctx->vm)
+	if (vm == rcu_access_pointer(ctx->vm))
 		goto unlock;
 
 	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
-	mutex_lock(&ctx->mutex);
 	lut_close(ctx);
-	mutex_unlock(&ctx->mutex);
 
 	old = __set_ppgtt(ctx, vm);
 
@@ -1069,8 +1044,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 	}
 
 unlock:
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
-
+	mutex_unlock(&ctx->mutex);
 out:
 	i915_vm_put(vm);
 	return err;
@@ -1953,7 +1927,7 @@ static int clone_vm(struct i915_gem_context *dst,
 
 	rcu_read_lock();
 	do {
-		vm = READ_ONCE(src->vm);
+		vm = rcu_dereference(src->vm);
 		if (!vm)
 			break;
 
@@ -1975,7 +1949,7 @@ static int clone_vm(struct i915_gem_context *dst,
 		 * it cannot be reallocated elsewhere.
 		 */
 
-		if (vm == READ_ONCE(src->vm))
+		if (vm == rcu_access_pointer(src->vm))
 			break;
 
 		i915_vm_put(vm);
@@ -2077,12 +2051,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 		return -EIO;
 	}
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
 	ext_data.ctx = i915_gem_create_context(i915, args->flags);
-	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ext_data.ctx))
 		return PTR_ERR(ext_data.ctx);
 
@@ -2209,10 +2178,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 
 	case I915_CONTEXT_PARAM_GTT_SIZE:
 		args->size = 0;
-		if (ctx->vm)
-			args->value = ctx->vm->total;
+		rcu_read_lock();
+		if (rcu_access_pointer(ctx->vm))
+			args->value = rcu_dereference(ctx->vm)->total;
 		else
 			args->value = to_i915(dev)->ggtt.vm.total;
+		rcu_read_unlock();
 		break;
 
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
@@ -2283,7 +2254,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
 				       void *data, struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_reset_stats *args = data;
 	struct i915_gem_context *ctx;
 	int ret;
@@ -2305,7 +2276,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		args->reset_count = i915_reset_count(&dev_priv->gpu_error);
+		args->reset_count = i915_reset_count(&i915->gpu_error);
 	else
 		args->reset_count = 0;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 1b0df53436cf..4ee5dfc5794e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -133,8 +133,8 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 }
 
 /* i915_gem_context.c */
-int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
+int __must_check i915_gem_init_contexts(struct drm_i915_private *i915);
+void i915_gem_driver_release__contexts(struct drm_i915_private *i915);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
 			  struct drm_file *file);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 6419da7c9f90..a3ecd19f2303 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -88,7 +88,7 @@ struct i915_gem_context {
 	 * In other modes, this is a NULL pointer with the expectation that
 	 * the caller uses the shared global GTT.
 	 */
-	struct i915_address_space *vm;
+	struct i915_address_space __rcu *vm;
 
 	/**
 	 * @pid: process id of creator
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 88a881be12ec..98816c35ffc3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -728,7 +728,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
 		return -ENOENT;
 
 	eb->gem_context = ctx;
-	if (ctx->vm)
+	if (rcu_access_pointer(ctx->vm))
 		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
 
 	eb->context_flags = 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 1738a15eb911..6d30955cb020 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -758,7 +758,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
 		 * On almost all of the older hw, we cannot tell the GPU that
 		 * a page is readonly.
 		 */
-		vm = dev_priv->kernel_context->vm;
+		vm = rcu_dereference_protected(dev_priv->kernel_context->vm, true);
 		if (!vm || !vm->has_read_only)
 			return -ENODEV;
 	}
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 98b2a6ccfcc1..e204e653b459 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1322,7 +1322,7 @@ static int igt_ppgtt_pin_update(void *arg)
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *dev_priv = ctx->i915;
 	unsigned long supported = INTEL_INFO(dev_priv)->page_sizes;
-	struct i915_address_space *vm = ctx->vm;
+	struct i915_address_space *vm = rcu_dereference_protected(ctx->vm, 1);
 	struct drm_i915_gem_object *obj;
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -1460,7 +1460,8 @@ static int igt_tmpfs_fallback(void *arg)
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *i915 = ctx->i915;
 	struct vfsmount *gemfs = i915->mm.gemfs;
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_address_space *vm =
+		rcu_dereference_protected(ctx->vm, true) ?: &i915->ggtt.vm;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	u32 *vaddr;
@@ -1517,7 +1518,8 @@ static int igt_shrink_thp(void *arg)
 {
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *i915 = ctx->i915;
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_address_space *vm =
+		rcu_dereference_protected(ctx->vm, true) ?: &i915->ggtt.vm;
 	struct drm_i915_gem_object *obj;
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -1699,8 +1701,8 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
 		goto out_unlock;
 	}
 
-	if (ctx->vm)
-		ctx->vm->scrub_64K = true;
+	if (rcu_access_pointer(ctx->vm))
+		rcu_dereference_protected(ctx->vm, true)->scrub_64K = true;
 
 	err = i915_subtests(tests, ctx);
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 2fb31ada2fa7..ffedc67f5c3d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -53,19 +53,17 @@ static int live_nop_switch(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx) {
 		err = -ENOMEM;
-		goto out_unlock;
+		goto out_file;
 	}
 
 	for (n = 0; n < nctx; n++) {
 		ctx[n] = live_context(i915, file);
 		if (IS_ERR(ctx[n])) {
 			err = PTR_ERR(ctx[n]);
-			goto out_unlock;
+			goto out_file;
 		}
 	}
 
@@ -79,7 +77,7 @@ static int live_nop_switch(void *arg)
 			rq = igt_request_alloc(ctx[n], engine);
 			if (IS_ERR(rq)) {
 				err = PTR_ERR(rq);
-				goto out_unlock;
+				goto out_file;
 			}
 			i915_request_add(rq);
 		}
@@ -87,7 +85,7 @@ static int live_nop_switch(void *arg)
 			pr_err("Failed to populated %d contexts\n", nctx);
 			intel_gt_set_wedged(&i915->gt);
 			err = -EIO;
-			goto out_unlock;
+			goto out_file;
 		}
 
 		times[1] = ktime_get_raw();
@@ -97,7 +95,7 @@ static int live_nop_switch(void *arg)
 
 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 		if (err)
-			goto out_unlock;
+			goto out_file;
 
 		end_time = jiffies + i915_selftest.timeout_jiffies;
 		for_each_prime_number_from(prime, 2, 8192) {
@@ -107,7 +105,7 @@ static int live_nop_switch(void *arg)
 				rq = igt_request_alloc(ctx[n % nctx], engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
-					goto out_unlock;
+					goto out_file;
 				}
 
 				/*
@@ -143,7 +141,7 @@ static int live_nop_switch(void *arg)
 
 		err = igt_live_test_end(&t);
 		if (err)
-			goto out_unlock;
+			goto out_file;
 
 		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
 			engine->name,
@@ -151,8 +149,7 @@ static int live_nop_switch(void *arg)
 			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
+out_file:
 	mock_file_free(i915, file);
 	return err;
 }
@@ -253,12 +250,10 @@ static int live_parallel_switch(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	ctx = live_context(i915, file);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
-		goto out_locked;
+		goto out_file;
 	}
 
 	engines = i915_gem_context_lock_engines(ctx);
@@ -268,7 +263,7 @@ static int live_parallel_switch(void *arg)
 	if (!data) {
 		i915_gem_context_unlock_engines(ctx);
 		err = -ENOMEM;
-		goto out_locked;
+		goto out;
 	}
 
 	m = 0; /* Use the first context as our template for the engines */
@@ -276,7 +271,7 @@ static int live_parallel_switch(void *arg)
 		err = intel_context_pin(ce);
 		if (err) {
 			i915_gem_context_unlock_engines(ctx);
-			goto out_locked;
+			goto out;
 		}
 		data[m++].ce[0] = intel_context_get(ce);
 	}
@@ -287,7 +282,7 @@ static int live_parallel_switch(void *arg)
 		ctx = live_context(i915, file);
 		if (IS_ERR(ctx)) {
 			err = PTR_ERR(ctx);
-			goto out_locked;
+			goto out;
 		}
 
 		for (m = 0; m < count; m++) {
@@ -296,20 +291,18 @@ static int live_parallel_switch(void *arg)
 
 			ce = intel_context_create(ctx, data[m].ce[0]->engine);
 			if (IS_ERR(ce))
-				goto out_locked;
+				goto out;
 
 			err = intel_context_pin(ce);
 			if (err) {
 				intel_context_put(ce);
-				goto out_locked;
+				goto out;
 			}
 
 			data[m].ce[n] = ce;
 		}
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	for (fn = func; !err && *fn; fn++) {
 		struct igt_live_test t;
 		int n;
@@ -354,8 +347,7 @@ static int live_parallel_switch(void *arg)
 		mutex_unlock(&i915->drm.struct_mutex);
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
-out_locked:
+out:
 	for (n = 0; n < count; n++) {
 		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
 			if (!data[n].ce[m])
@@ -365,8 +357,8 @@ static int live_parallel_switch(void *arg)
 			intel_context_put(data[n].ce[m]);
 		}
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 	kfree(data);
+out_file:
 	mock_file_free(i915, file);
 	return err;
 }
@@ -626,11 +618,9 @@ static int igt_ctx_exec(void *arg)
 		if (IS_ERR(file))
 			return PTR_ERR(file);
 
-		mutex_lock(&i915->drm.struct_mutex);
-
 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 		if (err)
-			goto out_unlock;
+			goto out_file;
 
 		ncontexts = 0;
 		ndwords = 0;
@@ -642,7 +632,7 @@ static int igt_ctx_exec(void *arg)
 			ctx = kernel_context(i915);
 			if (IS_ERR(ctx)) {
 				err = PTR_ERR(ctx);
-				goto out_unlock;
+				goto out_file;
 			}
 
 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
@@ -654,7 +644,7 @@ static int igt_ctx_exec(void *arg)
 					err = PTR_ERR(obj);
 					intel_context_put(ce);
 					kernel_context_close(ctx);
-					goto out_unlock;
+					goto out_file;
 				}
 			}
 
@@ -663,17 +653,18 @@ static int igt_ctx_exec(void *arg)
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
 				       engine->name,
-				       yesno(!!ctx->vm), err);
+				       yesno(!!rcu_access_pointer(ctx->vm)),
+				       err);
 				intel_context_put(ce);
 				kernel_context_close(ctx);
-				goto out_unlock;
+				goto out_file;
 			}
 
 			err = throttle(ce, tq, ARRAY_SIZE(tq));
 			if (err) {
 				intel_context_put(ce);
 				kernel_context_close(ctx);
-				goto out_unlock;
+				goto out_file;
 			}
 
 			if (++dw == max_dwords(obj)) {
@@ -703,11 +694,10 @@ static int igt_ctx_exec(void *arg)
 			dw += rem;
 		}
 
-out_unlock:
+out_file:
 		throttle_release(tq, ARRAY_SIZE(tq));
 		if (igt_live_test_end(&t))
 			err = -EIO;
-		mutex_unlock(&i915->drm.struct_mutex);
 
 		mock_file_free(i915, file);
 		if (err)
@@ -742,22 +732,20 @@ static int igt_shared_ctx_exec(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	parent = live_context(i915, file);
 	if (IS_ERR(parent)) {
 		err = PTR_ERR(parent);
-		goto out_unlock;
+		goto out_file;
 	}
 
 	if (!parent->vm) { /* not full-ppgtt; nothing to share */
 		err = 0;
-		goto out_unlock;
+		goto out_file;
 	}
 
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		goto out_file;
 
 	for_each_engine(engine, i915, id) {
 		unsigned long ncontexts, ndwords, dw;
@@ -801,7 +789,8 @@ static int igt_shared_ctx_exec(void *arg)
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
 				       engine->name,
-				       yesno(!!ctx->vm), err);
+				       yesno(!!rcu_access_pointer(ctx->vm)),
+				       err);
 				intel_context_put(ce);
 				kernel_context_close(ctx);
 				goto out_test;
@@ -840,17 +829,13 @@ static int igt_shared_ctx_exec(void *arg)
 			dw += rem;
 		}
 
-		mutex_unlock(&i915->drm.struct_mutex);
 		i915_gem_drain_freed_objects(i915);
-		mutex_lock(&i915->drm.struct_mutex);
 	}
 out_test:
 	throttle_release(tq, ARRAY_SIZE(tq));
 	if (igt_live_test_end(&t))
 		err = -EIO;
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-
+out_file:
 	mock_file_free(i915, file);
 	return err;
 }
@@ -1222,8 +1207,6 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	if (flags & TEST_RESET)
 		igt_global_reset_lock(&i915->gt);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	ctx = live_context(i915, file);
 	if (IS_ERR(ctx)) {
 		ret = PTR_ERR(ctx);
@@ -1278,8 +1261,6 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	i915_gem_object_put(obj);
 
 out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	if (flags & TEST_RESET)
 		igt_global_reset_unlock(&i915->gt);
 
@@ -1339,23 +1320,24 @@ static int igt_ctx_readonly(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		goto out_file;
 
 	ctx = live_context(i915, file);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
-		goto out_unlock;
+		goto out_file;
 	}
 
-	vm = ctx->vm ?: &i915->ggtt.alias->vm;
+	rcu_read_lock();
+	vm = rcu_dereference(ctx->vm) ?: &i915->ggtt.alias->vm;
 	if (!vm || !vm->has_read_only) {
+		rcu_read_unlock();
 		err = 0;
-		goto out_unlock;
+		goto out_file;
 	}
+	rcu_read_unlock();
 
 	ndwords = 0;
 	dw = 0;
@@ -1373,7 +1355,7 @@ static int igt_ctx_readonly(void *arg)
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
 					i915_gem_context_unlock_engines(ctx);
-					goto out_unlock;
+					goto out_file;
 				}
 
 				if (prandom_u32_state(&prng) & 1)
@@ -1384,15 +1366,17 @@ static int igt_ctx_readonly(void *arg)
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       ce->engine->name, yesno(!!ctx->vm), err);
+				       ce->engine->name,
+				       yesno(!!rcu_access_pointer(ctx->vm)),
+				       err);
 				i915_gem_context_unlock_engines(ctx);
-				goto out_unlock;
+				goto out_file;
 			}
 
 			err = throttle(ce, tq, ARRAY_SIZE(tq));
 			if (err) {
 				i915_gem_context_unlock_engines(ctx);
-				goto out_unlock;
+				goto out_file;
 			}
 
 			if (++dw == max_dwords(obj)) {
@@ -1424,11 +1408,10 @@ static int igt_ctx_readonly(void *arg)
 		dw += rem;
 	}
 
-out_unlock:
+out_file:
 	throttle_release(tq, ARRAY_SIZE(tq));
 	if (igt_live_test_end(&t))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	mock_file_free(i915, file);
 	return err;
@@ -1437,7 +1420,7 @@ static int igt_ctx_readonly(void *arg)
 static int check_scratch(struct i915_gem_context *ctx, u64 offset)
 {
 	struct drm_mm_node *node =
-		__drm_mm_interval_first(&ctx->vm->mm,
+		__drm_mm_interval_first(&rcu_dereference_protected(ctx->vm, true)->mm,
 					offset, offset + sizeof(u32) - 1);
 	if (!node || node->start > offset)
 		return 0;
@@ -1487,7 +1470,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 
 	intel_gt_chipset_flush(engine->gt);
 
-	vma = i915_vma_instance(obj, ctx->vm, NULL);
+	vma = i915_vma_instance(obj,
+				rcu_dereference_protected(ctx->vm, true),
+				NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err;
@@ -1586,7 +1571,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 
 	intel_gt_chipset_flush(engine->gt);
 
-	vma = i915_vma_instance(obj, ctx->vm, NULL);
+	vma = i915_vma_instance(obj,
+				rcu_dereference_protected(ctx->vm, true),
+				NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err;
@@ -1677,27 +1664,25 @@ static int igt_vm_isolation(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
-		goto out_unlock;
+		goto out_file;
 
 	ctx_a = live_context(i915, file);
 	if (IS_ERR(ctx_a)) {
 		err = PTR_ERR(ctx_a);
-		goto out_unlock;
+		goto out_file;
 	}
 
 	ctx_b = live_context(i915, file);
 	if (IS_ERR(ctx_b)) {
 		err = PTR_ERR(ctx_b);
-		goto out_unlock;
+		goto out_file;
 	}
 
 	/* We can only test vm isolation, if the vm are distinct */
 	if (ctx_a->vm == ctx_b->vm)
-		goto out_unlock;
+		goto out_file;
 
 	vm_total = ctx_a->vm->total;
 	GEM_BUG_ON(ctx_b->vm->total != vm_total);
@@ -1726,7 +1711,7 @@ static int igt_vm_isolation(void *arg)
 				err = read_from_scratch(ctx_b, engine,
 							offset, &value);
 			if (err)
-				goto out_unlock;
+				goto out_file;
 
 			if (value) {
 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
@@ -1735,7 +1720,7 @@ static int igt_vm_isolation(void *arg)
 				       lower_32_bits(offset),
 				       this);
 				err = -EINVAL;
-				goto out_unlock;
+				goto out_file;
 			}
 
 			this++;
@@ -1745,11 +1730,9 @@ static int igt_vm_isolation(void *arg)
 	pr_info("Checked %lu scratch offsets across %d engines\n",
 		count, RUNTIME_INFO(i915)->num_engines);
 
-out_unlock:
+out_file:
 	if (igt_live_test_end(&t))
 		err = -EIO;
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	mock_file_free(i915, file);
 	return err;
 }
@@ -1781,13 +1764,9 @@ static int mock_context_barrier(void *arg)
 	 * a request; useful for retiring old state after loading new.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	ctx = mock_context(i915, "mock");
-	if (!ctx) {
-		err = -ENOMEM;
-		goto unlock;
-	}
+	if (!ctx)
+		return -ENOMEM;
 
 	counter = 0;
 	err = context_barrier_task(ctx, 0,
@@ -1860,8 +1839,6 @@ static int mock_context_barrier(void *arg)
 
 out:
 	mock_context_close(ctx);
-unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 #undef pr_fmt
 #define pr_fmt(x) x
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index ebc46f098561..6ef86e7923a7 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -67,7 +67,7 @@ void mock_context_close(struct i915_gem_context *ctx)
 
 void mock_init_contexts(struct drm_i915_private *i915)
 {
-	init_contexts(i915);
+	init_contexts(&i915->gem.contexts);
 }
 
 struct i915_gem_context *
@@ -76,8 +76,6 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
 	struct i915_gem_context *ctx;
 	int err;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
 	ctx = i915_gem_create_context(i915, 0);
 	if (IS_ERR(ctx))
 		return ctx;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 35a40c2820a2..107eb03d6dd4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -226,7 +226,13 @@ intel_context_init(struct intel_context *ce,
 	kref_init(&ce->ref);
 
 	ce->gem_context = ctx;
-	ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm);
+	rcu_read_lock();
+	ce->vm = rcu_dereference(ctx->vm);
+	if (ce->vm && !kref_get_unless_zero(&ce->vm->ref))
+		ce->vm = NULL;
+	if (!ce->vm)
+		ce->vm = i915_vm_get(&engine->gt->ggtt->vm);
+	rcu_read_unlock();
 	if (ctx->timeline)
 		ce->timeline = intel_timeline_get(ctx->timeline);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 890cd9ab07a0..b4637f0c8def 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -984,6 +984,11 @@ __execlists_schedule_in(struct i915_request *rq)
 		ce->lrc_desc |=
 			(u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
 			GEN11_SW_CTX_ID_SHIFT;
+
+#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
+/* in Gen12 ID 0x7FF is reserved to indicate idle */
+#define GEN12_MAX_CONTEXT_HW_ID        (GEN11_MAX_CONTEXT_HW_ID - 1)
+
 		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 86cffbb0a9cb..7c838a57e174 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -155,13 +155,9 @@ static int live_context_size(void *arg)
 	 * HW tries to write past the end of one.
 	 */
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
 	fixme = kernel_context(gt->i915);
-	if (IS_ERR(fixme)) {
-		err = PTR_ERR(fixme);
-		goto unlock;
-	}
+	if (IS_ERR(fixme))
+		return PTR_ERR(fixme);
 
 	for_each_engine(engine, gt->i915, id) {
 		struct {
@@ -201,8 +197,6 @@ static int live_context_size(void *arg)
 	}
 
 	kernel_context_close(fixme);
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	return err;
 }
 
@@ -305,12 +299,10 @@ static int live_active_context(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
 	fixme = live_context(gt->i915, file);
 	if (IS_ERR(fixme)) {
 		err = PTR_ERR(fixme);
-		goto unlock;
+		goto out_file;
 	}
 
 	for_each_engine(engine, gt->i915, id) {
@@ -323,8 +315,7 @@ static int live_active_context(void *arg)
 			break;
 	}
 
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
+out_file:
 	mock_file_free(gt->i915, file);
 	return err;
 }
@@ -418,12 +409,10 @@ static int live_remote_context(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
-
 	fixme = live_context(gt->i915, file);
 	if (IS_ERR(fixme)) {
 		err = PTR_ERR(fixme);
-		goto unlock;
+		goto out_file;
 	}
 
 	for_each_engine(engine, gt->i915, id) {
@@ -436,8 +425,7 @@ static int live_remote_context(void *arg)
 			break;
 	}
 
-unlock:
-	mutex_unlock(&gt->i915->drm.struct_mutex);
+out_file:
 	mock_file_free(gt->i915, file);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index b0ca30220193..ecb41cd2bb4f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -58,9 +58,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt)
 	memset(h, 0, sizeof(*h));
 	h->gt = gt;
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	h->ctx = kernel_context(gt->i915);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	if (IS_ERR(h->ctx))
 		return PTR_ERR(h->ctx);
 
@@ -133,7 +131,8 @@ static struct i915_request *
 hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 {
 	struct intel_gt *gt = h->gt;
-	struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm;
+	struct i915_address_space *vm =
+		rcu_dereference_protected(h->ctx->vm, true) ?: &engine->gt->ggtt->vm;
 	struct drm_i915_gem_object *obj;
 	struct i915_request *rq = NULL;
 	struct i915_vma *hws, *vma;
@@ -382,9 +381,7 @@ static int igt_reset_nop(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	ctx = live_context(gt->i915, file);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto out;
@@ -458,9 +455,7 @@ static int igt_reset_nop_engine(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	ctx = live_context(gt->i915, file);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto out;
@@ -705,9 +700,7 @@ static int active_engine(void *data)
 		return PTR_ERR(file);
 
 	for (count = 0; count < ARRAY_SIZE(ctx); count++) {
-		mutex_lock(&engine->i915->drm.struct_mutex);
 		ctx[count] = live_context(engine->i915, file);
-		mutex_unlock(&engine->i915->drm.struct_mutex);
 		if (IS_ERR(ctx[count])) {
 			err = PTR_ERR(ctx[count]);
 			while (--count)
@@ -1298,18 +1291,18 @@ static int igt_reset_evict_ppgtt(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&gt->i915->drm.struct_mutex);
 	ctx = live_context(gt->i915, file);
-	mutex_unlock(&gt->i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto out;
 	}
 
 	err = 0;
-	if (ctx->vm) /* aliasing == global gtt locking, covered above */
-		err = __igt_reset_evict_vma(gt, ctx->vm,
+	if (rcu_access_pointer(ctx->vm)) {
+		/* aliasing == global gtt locking, covered above */
+		err = __igt_reset_evict_vma(gt, rcu_dereference_protected(ctx->vm, true),
 					    evict_vma, EXEC_OBJECT_WRITE);
+	}
 
 out:
 	mock_file_free(gt->i915, file);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 7d3d9c53c416..0357cf643e3d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1635,7 +1635,9 @@ static int smoke_submit(struct preempt_smoke *smoke,
 	int err = 0;
 
 	if (batch) {
-		vma = i915_vma_instance(batch, ctx->vm, NULL);
+		vma = i915_vma_instance(batch,
+					rcu_dereference_protected(ctx->vm, true),
+					NULL);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 4ee2e2babd0d..d17976d9fb91 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -33,6 +33,8 @@ struct wa_lists {
 	} engine[I915_NUM_ENGINES];
 };
 
+#define ctx_vm(ctx) rcu_dereference_protected((ctx)->vm, true)
+
 static void
 reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
 {
@@ -362,7 +364,7 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx)
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
-	vma = i915_vma_instance(obj, ctx->vm, NULL);
+	vma = i915_vma_instance(obj, ctx_vm(ctx), NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err_obj;
@@ -468,7 +470,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 	int err = 0, i, v;
 	u32 *cs, *results;
 
-	scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1);
+	scratch = create_scratch(ctx_vm(ctx), 2 * ARRAY_SIZE(values) + 1);
 	if (IS_ERR(scratch))
 		return PTR_ERR(scratch);
 
@@ -1018,14 +1020,14 @@ static int live_isolated_whitelist(void *arg)
 			goto err;
 		}
 
-		client[i].scratch[0] = create_scratch(c->vm, 1024);
+		client[i].scratch[0] = create_scratch(ctx_vm(c), 1024);
 		if (IS_ERR(client[i].scratch[0])) {
 			err = PTR_ERR(client[i].scratch[0]);
 			kernel_context_close(c);
 			goto err;
 		}
 
-		client[i].scratch[1] = create_scratch(c->vm, 1024);
+		client[i].scratch[1] = create_scratch(ctx_vm(c), 1024);
 		if (IS_ERR(client[i].scratch[1])) {
 			err = PTR_ERR(client[i].scratch[1]);
 			i915_vma_unpin_and_release(&client[i].scratch[0], 0);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 03f567084548..ce5cff01f44c 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -365,7 +365,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
 					  struct i915_gem_context *ctx)
 {
 	struct intel_vgpu_mm *mm = workload->shadow_mm;
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm);
+	struct i915_ppgtt *ppgtt =
+		i915_vm_to_ppgtt(rcu_dereference_protected(ctx->vm, true));
 	int i = 0;
 
 	if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
@@ -1230,20 +1231,18 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 	struct intel_vgpu_submission *s = &vgpu->submission;
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx;
+	struct i915_ppgtt *ppgtt;
 	enum intel_engine_id i;
 	int ret;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MAX);
-	if (IS_ERR(ctx)) {
-		ret = PTR_ERR(ctx);
-		goto out_unlock;
-	}
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
 
 	i915_gem_context_set_force_single_submission(ctx);
 
-	i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm));
+	ppgtt = i915_vm_to_ppgtt(rcu_dereference_protected(ctx->vm, true));
+	i915_context_ppgtt_root_save(s, ppgtt);
 
 	for_each_engine(engine, i915, i) {
 		struct intel_context *ce;
@@ -1289,11 +1288,10 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 	bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
 
 	i915_gem_context_put(ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return 0;
 
 out_shadow_ctx:
-	i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(ctx->vm));
+	i915_context_ppgtt_root_restore(s, ppgtt);
 	for_each_engine(engine, i915, i) {
 		if (IS_ERR(s->shadow[i]))
 			break;
@@ -1302,8 +1300,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		intel_context_put(s->shadow[i]);
 	}
 	i915_gem_context_put(ctx);
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 454de27c4912..6081a4539e1e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -316,12 +316,18 @@ static void print_context_stats(struct seq_file *m,
 				struct drm_i915_private *i915)
 {
 	struct file_stats kstats = {};
-	struct i915_gem_context *ctx;
+	struct i915_gem_context *ctx, *cn;
 
-	list_for_each_entry(ctx, &i915->contexts.list, link) {
+	spin_lock(&i915->gem.contexts.lock);
+	list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) {
 		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
+		if (!kref_get_unless_zero(&ctx->ref))
+			continue;
+
+		spin_unlock(&i915->gem.contexts.lock);
+
 		for_each_gem_engine(ce,
 				    i915_gem_context_lock_engines(ctx), it) {
 			intel_context_lock_pinned(ce);
@@ -338,7 +344,9 @@ static void print_context_stats(struct seq_file *m,
 		i915_gem_context_unlock_engines(ctx);
 
 		if (!IS_ERR_OR_NULL(ctx->file_priv)) {
-			struct file_stats stats = { .vm = ctx->vm, };
+			struct file_stats stats = {
+				.vm = rcu_access_pointer(ctx->vm),
+			};
 			struct drm_file *file = ctx->file_priv->file;
 			struct task_struct *task;
 			char name[80];
@@ -355,7 +363,12 @@ static void print_context_stats(struct seq_file *m,
 
 			print_file_stats(m, name, stats);
 		}
+
+		spin_lock(&i915->gem.contexts.lock);
+		list_safe_reset_next(ctx, cn, link);
+		i915_gem_context_put(ctx);
 	}
+	spin_unlock(&i915->gem.contexts.lock);
 
 	print_file_stats(m, "[k]contexts", kstats);
 }
@@ -363,7 +376,6 @@ static void print_context_stats(struct seq_file *m,
 static int i915_gem_object_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
-	int ret;
 
 	seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n",
 		   i915->mm.shrink_count,
@@ -372,12 +384,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 
 	seq_putc(m, '\n');
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	print_context_stats(m, i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return 0;
 }
@@ -1493,19 +1500,19 @@ static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
 
 static int i915_context_status(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct i915_gem_context *ctx;
-	int ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+	struct i915_gem_context *ctx, *cn;
 
-	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+	spin_lock(&i915->gem.contexts.lock);
+	list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) {
 		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
+		if (!kref_get_unless_zero(&ctx->ref))
+			continue;
+
+		spin_unlock(&i915->gem.contexts.lock);
+
 		seq_puts(m, "HW context ");
 		if (ctx->pid) {
 			struct task_struct *task;
@@ -1540,9 +1547,12 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		i915_gem_context_unlock_engines(ctx);
 
 		seq_putc(m, '\n');
-	}
 
-	mutex_unlock(&dev->struct_mutex);
+		spin_lock(&i915->gem.contexts.lock);
+		list_safe_reset_next(ctx, cn, link);
+		i915_gem_context_put(ctx);
+	}
+	spin_unlock(&i915->gem.contexts.lock);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 949f09bca8ba..9ac8d3b234c3 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1662,10 +1662,8 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	mutex_lock(&dev->struct_mutex);
 	i915_gem_context_close(file);
 	i915_gem_release(dev, file);
-	mutex_unlock(&dev->struct_mutex);
 
 	kfree_rcu(file_priv, rcu);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 79ddb44b834d..c91bdd8bd127 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1536,25 +1536,6 @@ struct drm_i915_private {
 	int audio_power_refcount;
 	u32 audio_freq_cntrl;
 
-	struct {
-		struct mutex mutex;
-		struct list_head list;
-		struct llist_head free_list;
-		struct work_struct free_work;
-
-		/* The hw wants to have a stable context identifier for the
-		 * lifetime of the context (for OA, PASID, faults, etc).
-		 * This is limited in execlists to 21 bits.
-		 */
-		struct ida hw_ida;
-#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
-#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
-#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
-/* in Gen12 ID 0x7FF is reserved to indicate idle */
-#define GEN12_MAX_CONTEXT_HW_ID	(GEN11_MAX_CONTEXT_HW_ID - 1)
-		struct list_head hw_id_list;
-	} contexts;
-
 	u32 fdi_rx_config;
 
 	/* Shadow for DISPLAY_PHY_CONTROL which can't be safely read */
@@ -1710,6 +1691,14 @@ struct drm_i915_private {
 
 	struct {
 		struct notifier_block pm_notifier;
+
+		struct i915_gem_contexts {
+			spinlock_t lock; /* locks list */
+			struct list_head list;
+
+			struct llist_head free_list;
+			struct work_struct free_work;
+		} contexts;
 	} gem;
 
 	/* For i945gm vblank irq vs. C3 workaround */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5a664bdead8c..f6db415985d5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1266,7 +1266,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_unlock;
 	}
 
-	ret = i915_gem_contexts_init(dev_priv);
+	ret = i915_gem_init_contexts(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
 		goto err_scratch;
@@ -1348,7 +1348,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	}
 err_context:
 	if (ret != -EIO)
-		i915_gem_contexts_fini(dev_priv);
+		i915_gem_driver_release__contexts(dev_priv);
 err_scratch:
 	intel_gt_driver_release(&dev_priv->gt);
 err_unlock:
@@ -1416,11 +1416,9 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 
 void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_engines_cleanup(dev_priv);
-	i915_gem_contexts_fini(dev_priv);
+	i915_gem_driver_release__contexts(dev_priv);
 	intel_gt_driver_release(&dev_priv->gt);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	intel_wa_list_free(&dev_priv->gt_wa_list);
 
@@ -1430,7 +1428,7 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 
 	i915_gem_drain_freed_objects(dev_priv);
 
-	WARN_ON(!list_empty(&dev_priv->contexts.list));
+	WARN_ON(!list_empty(&dev_priv->gem.contexts.list));
 }
 
 void i915_gem_init_mmio(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1d26634ca597..4dd667854d4d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1366,7 +1366,9 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	if (vm->has_read_only &&
 	    vm->i915->kernel_context &&
 	    vm->i915->kernel_context->vm) {
-		struct i915_address_space *clone = vm->i915->kernel_context->vm;
+		struct i915_address_space *clone =
+			rcu_dereference_protected(vm->i915->kernel_context->vm,
+						  true);
 
 		GEM_BUG_ON(!clone->has_read_only);
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index ecfbc37b738b..231388d06c82 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1853,8 +1853,8 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
 	};
 #undef ctx_flexeuN
 	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx;
-	int i;
+	struct i915_gem_context *ctx, *cn;
+	int i, err;
 
 	for (i = 2; i < ARRAY_SIZE(regs); i++)
 		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
@@ -1877,16 +1877,27 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
 	 * context. Contexts idle at the time of reconfiguration are not
 	 * trapped behind the barrier.
 	 */
-	list_for_each_entry(ctx, &i915->contexts.list, link) {
-		int err;
-
+	spin_lock(&i915->gem.contexts.lock);
+	list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) {
 		if (ctx == i915->kernel_context)
 			continue;
 
+		if (!kref_get_unless_zero(&ctx->ref))
+			continue;
+
+		spin_unlock(&i915->gem.contexts.lock);
+
 		err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs));
-		if (err)
+		if (err) {
+			i915_gem_context_put(ctx);
 			return err;
+		}
+
+		spin_lock(&i915->gem.contexts.lock);
+		list_safe_reset_next(ctx, cn, link);
+		i915_gem_context_put(ctx);
 	}
+	spin_unlock(&i915->gem.contexts.lock);
 
 	/*
 	 * After updating all other contexts, we need to modify ourselves.
@@ -1895,7 +1906,6 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
 	 */
 	for_each_uabi_engine(engine, i915) {
 		struct intel_context *ce = engine->kernel_context;
-		int err;
 
 		if (engine->class != RENDER_CLASS)
 			continue;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index da0cfcc65802..2ca1013ae692 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -145,9 +145,9 @@ static const struct attribute_group media_rc6_attr_group = {
 };
 #endif
 
-static int l3_access_valid(struct drm_i915_private *dev_priv, loff_t offset)
+static int l3_access_valid(struct drm_i915_private *i915, loff_t offset)
 {
-	if (!HAS_L3_DPF(dev_priv))
+	if (!HAS_L3_DPF(i915))
 		return -EPERM;
 
 	if (offset % 4 != 0)
@@ -165,31 +165,24 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
 	     loff_t offset, size_t count)
 {
 	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct drm_device *dev = &dev_priv->drm;
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
 	int slice = (int)(uintptr_t)attr->private;
 	int ret;
 
-	count = round_down(count, 4);
-
-	ret = l3_access_valid(dev_priv, offset);
+	ret = l3_access_valid(i915, offset);
 	if (ret)
 		return ret;
 
+	count = round_down(count, 4);
 	count = min_t(size_t, GEN7_L3LOG_SIZE - offset, count);
+	memset(buf, 0, count);
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
-	if (dev_priv->l3_parity.remap_info[slice])
+	spin_lock(&i915->gem.contexts.lock);
+	if (i915->l3_parity.remap_info[slice])
 		memcpy(buf,
-		       dev_priv->l3_parity.remap_info[slice] + (offset/4),
+		       i915->l3_parity.remap_info[slice] + offset / 4,
 		       count);
-	else
-		memset(buf, 0, count);
-
-	mutex_unlock(&dev->struct_mutex);
+	spin_unlock(&i915->gem.contexts.lock);
 
 	return count;
 }
@@ -200,46 +193,46 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	      loff_t offset, size_t count)
 {
 	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct drm_device *dev = &dev_priv->drm;
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
 	struct i915_gem_context *ctx;
 	int slice = (int)(uintptr_t)attr->private;
-	u32 **remap_info;
+	u32 *remap_info, *freeme = NULL;
 	int ret;
 
-	ret = l3_access_valid(dev_priv, offset);
+	ret = l3_access_valid(i915, offset);
 	if (ret)
 		return ret;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
+	remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
+	if (!remap_info)
+		return -ENOMEM;
 
-	remap_info = &dev_priv->l3_parity.remap_info[slice];
-	if (!*remap_info) {
-		*remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
-		if (!*remap_info) {
-			ret = -ENOMEM;
-			goto out;
-		}
+	spin_lock(&i915->gem.contexts.lock);
+
+	if (i915->l3_parity.remap_info[slice]) {
+		freeme = remap_info;
+		remap_info = i915->l3_parity.remap_info[slice];
+	} else {
+		i915->l3_parity.remap_info[slice] = remap_info;
 	}
 
-	/* TODO: Ideally we really want a GPU reset here to make sure errors
-	 * aren't propagated. Since I cannot find a stable way to reset the GPU
-	 * at this point it is left as a TODO.
-	*/
-	memcpy(*remap_info + (offset/4), buf, count);
+	count = round_down(count, 4);
+	memcpy(remap_info + offset / 4, buf, count);
 
 	/* NB: We defer the remapping until we switch to the context */
-	list_for_each_entry(ctx, &dev_priv->contexts.list, link)
-		ctx->remap_slice |= (1<<slice);
+	list_for_each_entry(ctx, &i915->gem.contexts.list, link)
+		ctx->remap_slice |= BIT(slice);
 
-	ret = count;
+	spin_unlock(&i915->gem.contexts.lock);
+	kfree(freeme);
 
-out:
-	mutex_unlock(&dev->struct_mutex);
+	/*
+	 * TODO: Ideally we really want a GPU reset here to make sure errors
+	 * aren't propagated. Since I cannot find a stable way to reset the GPU
+	 * at this point it is left as a TODO.
+	 */
 
-	return ret;
+	return count;
 }
 
 static const struct bin_attribute dpf_attrs = {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 1f2cf6cfafb5..7ef7a1e1664c 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -952,7 +952,7 @@ DECLARE_EVENT_CLASS(i915_context,
 	TP_fast_assign(
 			__entry->dev = ctx->i915->drm.primary->index;
 			__entry->ctx = ctx;
-			__entry->vm = ctx->vm;
+			__entry->vm = rcu_access_pointer(ctx->vm);
 	),
 
 	TP_printk("dev=%u, ctx=%p, ctx_vm=%p",
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 0346c3e5b6b6..bfa40a5b6d98 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -138,11 +138,9 @@ static int igt_gem_suspend(void *arg)
 		return PTR_ERR(file);
 
 	err = -ENOMEM;
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx = live_context(i915, file);
 	if (!IS_ERR(ctx))
 		err = switch_to_context(i915, ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
 	if (err)
 		goto out;
 
@@ -157,9 +155,7 @@ static int igt_gem_suspend(void *arg)
 
 	pm_resume(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	err = switch_to_context(i915, ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
 out:
 	mock_file_free(i915, file);
 	return err;
@@ -177,11 +173,9 @@ static int igt_gem_hibernate(void *arg)
 		return PTR_ERR(file);
 
 	err = -ENOMEM;
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx = live_context(i915, file);
 	if (!IS_ERR(ctx))
 		err = switch_to_context(i915, ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
 	if (err)
 		goto out;
 
@@ -196,9 +190,7 @@ static int igt_gem_hibernate(void *arg)
 
 	pm_resume(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	err = switch_to_context(i915, ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
 out:
 	mock_file_free(i915, file);
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index f39f0282e78c..0af9a58d011d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -473,7 +473,6 @@ static int igt_evict_contexts(void *arg)
 		}
 
 		count = 0;
-		mutex_lock(&i915->drm.struct_mutex);
 		onstack_fence_init(&fence);
 		do {
 			struct i915_request *rq;
@@ -510,8 +509,6 @@ static int igt_evict_contexts(void *arg)
 			count++;
 			err = 0;
 		} while(1);
-		mutex_unlock(&i915->drm.struct_mutex);
-
 		onstack_fence_fini(&fence);
 		pr_info("Submitted %lu contexts/requests on %s\n",
 			count, engine->name);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 9939dd40c6a8..bdd336a52f90 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1246,6 +1246,7 @@ static int exercise_mock(struct drm_i915_private *i915,
 				     unsigned long end_time))
 {
 	const u64 limit = totalram_pages() << PAGE_SHIFT;
+	struct i915_address_space *vm;
 	struct i915_gem_context *ctx;
 	IGT_TIMEOUT(end_time);
 	int err;
@@ -1254,7 +1255,8 @@ static int exercise_mock(struct drm_i915_private *i915,
 	if (!ctx)
 		return -ENOMEM;
 
-	err = func(i915, ctx->vm, 0, min(ctx->vm->total, limit), end_time);
+	vm = rcu_dereference_protected(ctx->vm, true);
+	err = func(i915, vm, 0, min(vm->total, limit), end_time);
 
 	mock_context_close(ctx);
 	return err;
@@ -1798,7 +1800,7 @@ static int igt_cs_tlb(void *arg)
 		goto out_unlock;
 	}
 
-	vm = ctx->vm;
+	vm = rcu_dereference_protected(ctx->vm, true);
 	if (!vm)
 		goto out_unlock;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index d7d68c6a6bd5..247e3ba61948 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -181,9 +181,7 @@ static int igt_request_rewind(void *arg)
 	struct intel_context *ce;
 	int err = -EINVAL;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx[0] = mock_context(i915, "A");
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	ce = i915_gem_context_get_engine(ctx[0], RCS0);
 	GEM_BUG_ON(IS_ERR(ce));
@@ -197,9 +195,7 @@ static int igt_request_rewind(void *arg)
 	i915_request_get(request);
 	i915_request_add(request);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx[1] = mock_context(i915, "B");
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	ce = i915_gem_context_get_engine(ctx[1], RCS0);
 	GEM_BUG_ON(IS_ERR(ce));
@@ -438,9 +434,7 @@ static int mock_breadcrumbs_smoketest(void *arg)
 	}
 
 	for (n = 0; n < t.ncontexts; n++) {
-		mutex_lock(&t.engine->i915->drm.struct_mutex);
 		t.contexts[n] = mock_context(t.engine->i915, "mock");
-		mutex_unlock(&t.engine->i915->drm.struct_mutex);
 		if (!t.contexts[n]) {
 			ret = -ENOMEM;
 			goto out_contexts;
@@ -734,7 +728,8 @@ static int live_empty_request(void *arg)
 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 {
 	struct i915_gem_context *ctx = i915->kernel_context;
-	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_address_space *vm =
+		rcu_dereference_protected(ctx->vm, true) ?: &i915->ggtt.vm;
 	struct drm_i915_gem_object *obj;
 	const int gen = INTEL_GEN(i915);
 	struct i915_vma *vma;
@@ -1220,9 +1215,7 @@ static int live_breadcrumbs_smoketest(void *arg)
 	}
 
 	for (n = 0; n < t[0].ncontexts; n++) {
-		mutex_lock(&i915->drm.struct_mutex);
 		t[0].contexts[n] = live_context(i915, file);
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (!t[0].contexts[n]) {
 			ret = -ENOMEM;
 			goto out_contexts;
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index ac1ff558eb90..f6e6b0aae38c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -38,7 +38,7 @@ static bool assert_vma(struct i915_vma *vma,
 {
 	bool ok = true;
 
-	if (vma->vm != ctx->vm) {
+	if (vma->vm != rcu_access_pointer(ctx->vm)) {
 		pr_err("VMA created with wrong VM\n");
 		ok = false;
 	}
@@ -113,7 +113,8 @@ static int create_vmas(struct drm_i915_private *i915,
 	list_for_each_entry(obj, objects, st_link) {
 		for (pinned = 0; pinned <= 1; pinned++) {
 			list_for_each_entry(ctx, contexts, link) {
-				struct i915_address_space *vm = ctx->vm;
+				struct i915_address_space *vm =
+					rcu_dereference_protected(ctx->vm, true);
 				struct i915_vma *vma;
 				int err;
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 4e6cde0d4859..335f37ba98de 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -59,11 +59,9 @@ static void mock_device_release(struct drm_device *dev)
 
 	i915_gem_drain_workqueue(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	for_each_engine(engine, i915, id)
 		mock_engine_free(engine);
-	i915_gem_contexts_fini(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_driver_release__contexts(i915);
 
 	intel_timelines_fini(i915);
 
@@ -206,7 +204,7 @@ struct drm_i915_private *mock_gem_device(void)
 	return i915;
 
 err_context:
-	i915_gem_contexts_fini(i915);
+	i915_gem_driver_release__contexts(i915);
 err_engine:
 	mock_engine_free(i915->engine[RCS0]);
 err_unlock:
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 24/30] drm/i915/overlay: Drop struct_mutex guard
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (21 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 23/30] drm/i915: Move context management under GEM Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 25/30] drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info Chris Wilson
                   ` (11 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

The overlay uses the modeset mutex to control itself and only required
the struct_mutex for requests, which is now obsolete.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c |  7 +------
 drivers/gpu/drm/i915/display/intel_overlay.c | 13 -------------
 drivers/gpu/drm/i915/gt/intel_reset.c        |  4 ----
 3 files changed, 1 insertion(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index cbc525103c69..891bcbba1f6b 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -5738,13 +5738,8 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state)
 
 static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc)
 {
-	if (intel_crtc->overlay) {
-		struct drm_device *dev = intel_crtc->base.dev;
-
-		mutex_lock(&dev->struct_mutex);
+	if (intel_crtc->overlay)
 		(void) intel_overlay_switch_off(intel_crtc->overlay);
-		mutex_unlock(&dev->struct_mutex);
-	}
 
 	/* Let userspace switch the overlay on again. In most cases userspace
 	 * has to recompute where to put it anyway.
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index e12e1a753af0..daea112cbb87 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -439,8 +439,6 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 	struct i915_request *rq;
 	u32 *cs;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	/*
 	 * Only wait if there is actually an old frame to release to
 	 * guarantee forward progress.
@@ -751,7 +749,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	struct i915_vma *vma;
 	int ret, tmp_width;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
 
 	ret = intel_overlay_release_old_vid(overlay);
@@ -852,7 +849,6 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
 	struct drm_i915_private *dev_priv = overlay->i915;
 	int ret;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
 
 	ret = intel_overlay_recover_from_interrupt(overlay);
@@ -1068,11 +1064,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
 
 	if (!(params->flags & I915_OVERLAY_ENABLE)) {
 		drm_modeset_lock_all(dev);
-		mutex_lock(&dev->struct_mutex);
-
 		ret = intel_overlay_switch_off(overlay);
-
-		mutex_unlock(&dev->struct_mutex);
 		drm_modeset_unlock_all(dev);
 
 		return ret;
@@ -1088,7 +1080,6 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 
 	drm_modeset_lock_all(dev);
-	mutex_lock(&dev->struct_mutex);
 
 	if (i915_gem_object_is_tiled(new_bo)) {
 		DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n");
@@ -1152,14 +1143,12 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
 	if (ret != 0)
 		goto out_unlock;
 
-	mutex_unlock(&dev->struct_mutex);
 	drm_modeset_unlock_all(dev);
 	i915_gem_object_put(new_bo);
 
 	return 0;
 
 out_unlock:
-	mutex_unlock(&dev->struct_mutex);
 	drm_modeset_unlock_all(dev);
 	i915_gem_object_put(new_bo);
 
@@ -1233,7 +1222,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
 	}
 
 	drm_modeset_lock_all(dev);
-	mutex_lock(&dev->struct_mutex);
 
 	ret = -EINVAL;
 	if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
@@ -1290,7 +1278,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
 
 	ret = 0;
 out_unlock:
-	mutex_unlock(&dev->struct_mutex);
 	drm_modeset_unlock_all(dev);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 806b668d89aa..dd372b372ee6 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1286,10 +1286,6 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
 	if (!test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
 		return -EIO;
 
-	/* XXX intel_reset_finish() still takes struct_mutex!!! */
-	if (mutex_is_locked(&gt->i915->drm.struct_mutex))
-		return -EAGAIN;
-
 	if (wait_event_interruptible(gt->reset.queue,
 				     !test_bit(I915_RESET_BACKOFF,
 					       &gt->reset.flags)))
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 25/30] drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (22 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 24/30] drm/i915/overlay: Drop struct_mutex guard Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 26/30] drm/i915: Remove struct_mutex guard for debugfs/opregion Chris Wilson
                   ` (10 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

It protects nothing being accessed for the intel_framebuffer, so it's
own locking had better be sufficient.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6081a4539e1e..00c5e61bdc3a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1448,11 +1448,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 	struct drm_device *dev = &dev_priv->drm;
 	struct intel_framebuffer *fbdev_fb = NULL;
 	struct drm_framebuffer *drm_fb;
-	int ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 	if (dev_priv->fbdev && dev_priv->fbdev->helper.fb) {
@@ -1487,7 +1482,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 		seq_putc(m, '\n');
 	}
 	mutex_unlock(&dev->mode_config.fb_lock);
-	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 26/30] drm/i915: Remove struct_mutex guard for debugfs/opregion
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (23 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 25/30] drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 27/30] drm/i915: Drop struct_mutex from suspend state save/restore Chris Wilson
                   ` (9 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

Having a struct_mutex around the read of a BIOS blob serves no purpose.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 00c5e61bdc3a..6a7e6bfa5818 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1414,21 +1414,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 
 static int i915_opregion(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct intel_opregion *opregion = &dev_priv->opregion;
-	int ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		goto out;
+	struct intel_opregion *opregion = &node_to_i915(m->private)->opregion;
 
 	if (opregion->header)
 		seq_write(m, opregion->header, OPREGION_SIZE);
 
-	mutex_unlock(&dev->struct_mutex);
-
-out:
 	return 0;
 }
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 27/30] drm/i915: Drop struct_mutex from suspend state save/restore
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (24 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 26/30] drm/i915: Remove struct_mutex guard for debugfs/opregion Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 28/30] drm/i915/selftests: Drop vestigal struct_mutex guards Chris Wilson
                   ` (8 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

struct_mutex provides no serialisation of the registers and data
structures being saved and restored across suspend/resume. It is
completely superfluous here.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_suspend.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 2b2086def0f1..8812cdd9007f 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -66,8 +66,6 @@ int i915_save_state(struct drm_i915_private *dev_priv)
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	int i;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-
 	i915_save_display(dev_priv);
 
 	if (IS_GEN(dev_priv, 4))
@@ -101,8 +99,6 @@ int i915_save_state(struct drm_i915_private *dev_priv)
 			dev_priv->regfile.saveSWF3[i] = I915_READ(SWF3(i));
 	}
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	return 0;
 }
 
@@ -111,8 +107,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv)
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	int i;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-
 	if (IS_GEN(dev_priv, 4))
 		pci_write_config_word(pdev, GCDGMBUS,
 				      dev_priv->regfile.saveGCDGMBUS);
@@ -146,8 +140,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv)
 			I915_WRITE(SWF3(i), dev_priv->regfile.saveSWF3[i]);
 	}
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	intel_gmbus_reset(dev_priv);
 
 	return 0;
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 28/30] drm/i915/selftests: Drop vestigal struct_mutex guards
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (25 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 27/30] drm/i915: Drop struct_mutex from suspend state save/restore Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:19 ` [PATCH 29/30] drm/i915: Drop struct_mutex from around GEM initialisation Chris Wilson
                   ` (7 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

We no longer need struct_mutex to serialise request emission, so remove
it from the gt selftests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  15 +-
 .../drm/i915/gem/selftests/i915_gem_context.c |   4 -
 .../drm/i915/gem/selftests/i915_gem_mman.c    |   2 -
 .../drm/i915/gem/selftests/i915_gem_phys.c    |   2 -
 drivers/gpu/drm/i915/gt/selftest_lrc.c        | 148 +++---------------
 .../gpu/drm/i915/gt/selftest_workarounds.c    |  11 +-
 drivers/gpu/drm/i915/gt/uc/selftest_guc.c     |   4 -
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   2 -
 8 files changed, 27 insertions(+), 161 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index e204e653b459..75e717f7bb5a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1633,7 +1633,6 @@ int i915_gem_huge_page_mock_selftests(void)
 	mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
 	mkwrite_device_info(dev_priv)->ppgtt_size = 48;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	ppgtt = i915_ppgtt_create(dev_priv);
 	if (IS_ERR(ppgtt)) {
 		err = PTR_ERR(ppgtt);
@@ -1659,9 +1658,7 @@ int i915_gem_huge_page_mock_selftests(void)
 	i915_vm_put(&ppgtt->vm);
 
 out_unlock:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	drm_dev_put(&dev_priv->drm);
-
 	return err;
 }
 
@@ -1677,7 +1674,6 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
 	};
 	struct drm_file *file;
 	struct i915_gem_context *ctx;
-	intel_wakeref_t wakeref;
 	int err;
 
 	if (!HAS_PPGTT(i915)) {
@@ -1692,13 +1688,10 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	ctx = live_context(i915, file);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
-		goto out_unlock;
+		goto out_file;
 	}
 
 	if (rcu_access_pointer(ctx->vm))
@@ -1706,11 +1699,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
 
 	err = i915_subtests(tests, ctx);
 
-out_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
+out_file:
 	mock_file_free(i915, file);
-
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index ffedc67f5c3d..15e81095b5d6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -307,9 +307,7 @@ static int live_parallel_switch(void *arg)
 		struct igt_live_test t;
 		int n;
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = igt_live_test_begin(&t, i915, __func__, "");
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			break;
 
@@ -341,10 +339,8 @@ static int live_parallel_switch(void *arg)
 			data[n].tsk = NULL;
 		}
 
-		mutex_lock(&i915->drm.struct_mutex);
 		if (igt_live_test_end(&t))
 			err = -EIO;
-		mutex_unlock(&i915->drm.struct_mutex);
 	}
 
 out:
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 1cd25cfd0246..cfa52c525691 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -669,9 +669,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 			goto out;
 		}
 
-		mutex_lock(&i915->drm.struct_mutex);
 		err = make_obj_busy(obj);
-		mutex_unlock(&i915->drm.struct_mutex);
 		if (err) {
 			pr_err("[loop %d] Failed to busy the object\n", loop);
 			goto err_obj;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
index 94a15e3f6db8..34932871b3a5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -25,9 +25,7 @@ static int mock_phys_object(void *arg)
 		goto out;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
 	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
-	mutex_unlock(&i915->drm.struct_mutex);
 	if (err) {
 		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
 		goto out_obj;
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 0357cf643e3d..177cb7f3cf92 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -26,17 +26,13 @@ static int live_sanitycheck(void *arg)
 	struct i915_gem_context *ctx;
 	struct intel_context *ce;
 	struct igt_spinner spin;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	if (igt_spinner_init(&spin, &i915->gt))
-		goto err_unlock;
+		return -ENOMEM;
 
 	ctx = kernel_context(i915);
 	if (!ctx)
@@ -73,9 +69,6 @@ static int live_sanitycheck(void *arg)
 	kernel_context_close(ctx);
 err_spin:
 	igt_spinner_fini(&spin);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -406,7 +399,6 @@ static int live_timeslice_preempt(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct drm_i915_gem_object *obj;
-	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	void *vaddr;
 	int err = 0;
@@ -421,14 +413,9 @@ static int live_timeslice_preempt(void *arg)
 	 * ready task.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto err_unlock;
-	}
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
 
 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
 	if (IS_ERR(vma)) {
@@ -473,10 +460,6 @@ static int live_timeslice_preempt(void *arg)
 	i915_gem_object_unpin_map(obj);
 err_obj:
 	i915_gem_object_put(obj);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	return err;
 }
 
@@ -488,7 +471,6 @@ static int live_busywait_preempt(void *arg)
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 	u32 *map;
 
@@ -497,12 +479,9 @@ static int live_busywait_preempt(void *arg)
 	 * preempt the busywaits used to synchronise between rings.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	ctx_hi = kernel_context(i915);
 	if (!ctx_hi)
-		goto err_unlock;
+		return -ENOMEM;
 	ctx_hi->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
@@ -656,9 +635,6 @@ static int live_busywait_preempt(void *arg)
 	kernel_context_close(ctx_lo);
 err_ctx_hi:
 	kernel_context_close(ctx_hi);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -687,7 +663,6 @@ static int live_preempt(void *arg)
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
@@ -696,11 +671,8 @@ static int live_preempt(void *arg)
 	if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
 		pr_err("Logical preemption supported, but not exposed\n");
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	if (igt_spinner_init(&spin_hi, &i915->gt))
-		goto err_unlock;
+		return -ENOMEM;
 
 	if (igt_spinner_init(&spin_lo, &i915->gt))
 		goto err_spin_hi;
@@ -780,9 +752,6 @@ static int live_preempt(void *arg)
 	igt_spinner_fini(&spin_lo);
 err_spin_hi:
 	igt_spinner_fini(&spin_hi);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -794,17 +763,13 @@ static int live_late_preempt(void *arg)
 	struct intel_engine_cs *engine;
 	struct i915_sched_attr attr = {};
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	if (igt_spinner_init(&spin_hi, &i915->gt))
-		goto err_unlock;
+		return -ENOMEM;
 
 	if (igt_spinner_init(&spin_lo, &i915->gt))
 		goto err_spin_hi;
@@ -886,9 +851,6 @@ static int live_late_preempt(void *arg)
 	igt_spinner_fini(&spin_lo);
 err_spin_hi:
 	igt_spinner_fini(&spin_hi);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
@@ -933,7 +895,6 @@ static int live_nopreempt(void *arg)
 	struct intel_engine_cs *engine;
 	struct preempt_client a, b;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	/*
@@ -944,11 +905,8 @@ static int live_nopreempt(void *arg)
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	if (preempt_client_init(i915, &a))
-		goto err_unlock;
+		return -ENOMEM;
 	if (preempt_client_init(i915, &b))
 		goto err_client_a;
 	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
@@ -1022,9 +980,6 @@ static int live_nopreempt(void *arg)
 	preempt_client_fini(&b);
 err_client_a:
 	preempt_client_fini(&a);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
@@ -1044,7 +999,6 @@ static int live_suppress_self_preempt(void *arg)
 	};
 	struct preempt_client a, b;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	/*
@@ -1063,11 +1017,8 @@ static int live_suppress_self_preempt(void *arg)
 	if (intel_vgpu_active(i915))
 		return 0; /* GVT forces single port & request submission */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	if (preempt_client_init(i915, &a))
-		goto err_unlock;
+		return -ENOMEM;
 	if (preempt_client_init(i915, &b))
 		goto err_client_a;
 
@@ -1148,9 +1099,6 @@ static int live_suppress_self_preempt(void *arg)
 	preempt_client_fini(&b);
 err_client_a:
 	preempt_client_fini(&a);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
@@ -1220,7 +1168,6 @@ static int live_suppress_wait_preempt(void *arg)
 	struct preempt_client client[4];
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 	int i;
 
@@ -1233,11 +1180,8 @@ static int live_suppress_wait_preempt(void *arg)
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
-		goto err_unlock;
+		return -ENOMEM;
 	if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
 		goto err_client_0;
 	if (preempt_client_init(i915, &client[2])) /* head of queue */
@@ -1323,9 +1267,6 @@ static int live_suppress_wait_preempt(void *arg)
 	preempt_client_fini(&client[1]);
 err_client_0:
 	preempt_client_fini(&client[0]);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
@@ -1342,7 +1283,6 @@ static int live_chain_preempt(void *arg)
 	struct intel_engine_cs *engine;
 	struct preempt_client hi, lo;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	/*
@@ -1354,11 +1294,8 @@ static int live_chain_preempt(void *arg)
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	if (preempt_client_init(i915, &hi))
-		goto err_unlock;
+		return -ENOMEM;
 
 	if (preempt_client_init(i915, &lo))
 		goto err_client_hi;
@@ -1469,9 +1406,6 @@ static int live_chain_preempt(void *arg)
 	preempt_client_fini(&lo);
 err_client_hi:
 	preempt_client_fini(&hi);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
@@ -1489,7 +1423,6 @@ static int live_preempt_hang(void *arg)
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
@@ -1498,11 +1431,8 @@ static int live_preempt_hang(void *arg)
 	if (!intel_has_reset_engine(&i915->gt))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
 	if (igt_spinner_init(&spin_hi, &i915->gt))
-		goto err_unlock;
+		return -ENOMEM;
 
 	if (igt_spinner_init(&spin_lo, &i915->gt))
 		goto err_spin_hi;
@@ -1594,9 +1524,6 @@ static int live_preempt_hang(void *arg)
 	igt_spinner_fini(&spin_lo);
 err_spin_hi:
 	igt_spinner_fini(&spin_hi);
-err_unlock:
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1686,11 +1613,9 @@ static int smoke_crescendo_thread(void *arg)
 		struct i915_gem_context *ctx = smoke_context(smoke);
 		int err;
 
-		mutex_lock(&smoke->i915->drm.struct_mutex);
 		err = smoke_submit(smoke,
 				   ctx, count % I915_PRIORITY_MAX,
 				   smoke->batch);
-		mutex_unlock(&smoke->i915->drm.struct_mutex);
 		if (err)
 			return err;
 
@@ -1711,8 +1636,6 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
 	unsigned long count;
 	int err = 0;
 
-	mutex_unlock(&smoke->i915->drm.struct_mutex);
-
 	for_each_engine(engine, smoke->i915, id) {
 		arg[id] = *smoke;
 		arg[id].engine = engine;
@@ -1745,8 +1668,6 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
 		put_task_struct(tsk[id]);
 	}
 
-	mutex_lock(&smoke->i915->drm.struct_mutex);
-
 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
 		count, flags,
 		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
@@ -1789,7 +1710,6 @@ static int live_preempt_smoke(void *arg)
 		.ncontext = 1024,
 	};
 	const unsigned int phase[] = { 0, BATCH };
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	int err = -ENOMEM;
 	u32 *cs;
@@ -1804,13 +1724,10 @@ static int live_preempt_smoke(void *arg)
 	if (!smoke.contexts)
 		return -ENOMEM;
 
-	mutex_lock(&smoke.i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm);
-
 	smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
 	if (IS_ERR(smoke.batch)) {
 		err = PTR_ERR(smoke.batch);
-		goto err_unlock;
+		goto err_free;
 	}
 
 	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
@@ -1857,9 +1774,7 @@ static int live_preempt_smoke(void *arg)
 
 err_batch:
 	i915_gem_object_put(smoke.batch);
-err_unlock:
-	intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref);
-	mutex_unlock(&smoke.i915->drm.struct_mutex);
+err_free:
 	kfree(smoke.contexts);
 
 	return err;
@@ -1997,19 +1912,17 @@ static int live_virtual_engine(void *arg)
 	struct intel_gt *gt = &i915->gt;
 	enum intel_engine_id id;
 	unsigned int class, inst;
-	int err = -ENODEV;
+	int err;
 
 	if (USES_GUC_SUBMISSION(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	for_each_engine(engine, i915, id) {
 		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
 		if (err) {
 			pr_err("Failed to wrap engine %s: err=%d\n",
 			       engine->name, err);
-			goto out_unlock;
+			return err;
 		}
 	}
 
@@ -2030,17 +1943,15 @@ static int live_virtual_engine(void *arg)
 			err = nop_virtual_engine(i915, siblings, nsibling,
 						 n, 0);
 			if (err)
-				goto out_unlock;
+				return err;
 		}
 
 		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
 		if (err)
-			goto out_unlock;
+			return err;
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
+	return 0;
 }
 
 static int mask_virtual_engine(struct drm_i915_private *i915,
@@ -2119,9 +2030,6 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 	}
 
 	err = igt_live_test_end(&t);
-	if (err)
-		goto out;
-
 out:
 	if (igt_flush_test(i915))
 		err = -EIO;
@@ -2144,13 +2052,11 @@ static int live_virtual_mask(void *arg)
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
 	struct intel_gt *gt = &i915->gt;
 	unsigned int class, inst;
-	int err = 0;
+	int err;
 
 	if (USES_GUC_SUBMISSION(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
 		unsigned int nsibling;
 
@@ -2166,12 +2072,10 @@ static int live_virtual_mask(void *arg)
 
 		err = mask_virtual_engine(i915, siblings, nsibling);
 		if (err)
-			goto out_unlock;
+			return err;
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
+	return 0;
 }
 
 static int bond_virtual_engine(struct drm_i915_private *i915,
@@ -2322,13 +2226,11 @@ static int live_virtual_bond(void *arg)
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
 	struct intel_gt *gt = &i915->gt;
 	unsigned int class, inst;
-	int err = 0;
+	int err;
 
 	if (USES_GUC_SUBMISSION(i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
 		const struct phase *p;
 		int nsibling;
@@ -2351,14 +2253,12 @@ static int live_virtual_bond(void *arg)
 			if (err) {
 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
 				       __func__, p->name, class, nsibling, err);
-				goto out_unlock;
+				return err;
 			}
 		}
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
+	return 0;
 }
 
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index d17976d9fb91..79ebe606987c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -704,9 +704,7 @@ static int live_dirty_whitelist(void *arg)
 
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
-	mutex_unlock(&i915->drm.struct_mutex);
 	file = mock_file(i915);
-	mutex_lock(&i915->drm.struct_mutex);
 	if (IS_ERR(file)) {
 		err = PTR_ERR(file);
 		goto out_rpm;
@@ -728,9 +726,7 @@ static int live_dirty_whitelist(void *arg)
 	}
 
 out_file:
-	mutex_unlock(&i915->drm.struct_mutex);
 	mock_file_free(i915, file);
-	mutex_lock(&i915->drm.struct_mutex);
 out_rpm:
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	return err;
@@ -1264,14 +1260,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_gpu_reset_workarounds),
 		SUBTEST(live_engine_reset_workarounds),
 	};
-	int err;
 
 	if (intel_gt_is_wedged(&i915->gt))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_subtests(tests, i915);
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return err;
+	return i915_subtests(tests, i915);
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index bba0eafe1cdb..f927f851aadf 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -116,7 +116,6 @@ static int igt_guc_clients(void *args)
 	int err = 0;
 
 	GEM_BUG_ON(!HAS_GT_UC(dev_priv));
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	guc = &dev_priv->gt.uc.guc;
@@ -190,7 +189,6 @@ static int igt_guc_clients(void *args)
 	guc_clients_enable(guc);
 unlock:
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
 
@@ -208,7 +206,6 @@ static int igt_guc_doorbells(void *arg)
 	u16 db_id;
 
 	GEM_BUG_ON(!HAS_GT_UC(dev_priv));
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	guc = &dev_priv->gt.uc.guc;
@@ -299,7 +296,6 @@ static int igt_guc_doorbells(void *arg)
 		}
 unlock:
 	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index bdd336a52f90..e00e21763fc3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1793,7 +1793,6 @@ static int igt_cs_tlb(void *arg)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx = live_context(i915, file);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
@@ -2013,7 +2012,6 @@ static int igt_cs_tlb(void *arg)
 out_put_bbe:
 	i915_gem_object_put(bbe);
 out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	mock_file_free(i915, file);
 	return err;
 }
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 29/30] drm/i915: Drop struct_mutex from around GEM initialisation
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (26 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 28/30] drm/i915/selftests: Drop vestigal struct_mutex guards Chris Wilson
@ 2019-10-02 11:19 ` Chris Wilson
  2019-10-02 11:20 ` [PATCH 30/30] drm/i915: Flush idle barriers when waiting Chris Wilson
                   ` (6 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

We no longer need to placate lockdep by holding struct_mutex for our
initialisation, so don't.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c           | 2 --
 drivers/gpu/drm/i915/i915_gem.c                  | 9 ---------
 drivers/gpu/drm/i915/selftests/mock_gem_device.c | 7 -------
 3 files changed, 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index b32088a93232..0e97520cb1bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -172,7 +172,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
 {
 	GEM_TRACE("\n");
 
-	mutex_lock(&i915->drm.struct_mutex);
 	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
 
 	if (intel_gt_init_hw(&i915->gt))
@@ -196,7 +195,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
 
 out_unlock:
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return;
 
 err_wedged:
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f6db415985d5..0ddbd3a5fb8d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1249,7 +1249,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	 * we hold the forcewake during initialisation these problems
 	 * just magically go away.
 	 */
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
 
 	ret = i915_init_ggtt(dev_priv);
@@ -1319,7 +1318,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_gt;
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	return 0;
 
@@ -1330,15 +1328,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	 * driver doesn't explode during runtime.
 	 */
 err_gt:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	intel_gt_set_wedged_on_init(&dev_priv->gt);
 	i915_gem_suspend(dev_priv);
 	i915_gem_suspend_late(dev_priv);
 
 	i915_gem_drain_workqueue(dev_priv);
-
-	mutex_lock(&dev_priv->drm.struct_mutex);
 err_init_hw:
 	intel_uc_fini_hw(&dev_priv->gt.uc);
 err_uc_init:
@@ -1353,7 +1347,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_gt_driver_release(&dev_priv->gt);
 err_unlock:
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	if (ret != -EIO) {
 		intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
@@ -1406,10 +1399,8 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 	/* Flush any outstanding unpin_work. */
 	i915_gem_drain_workqueue(dev_priv);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_uc_fini_hw(&dev_priv->gt.uc);
 	intel_uc_fini(&dev_priv->gt.uc);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	i915_gem_drain_freed_objects(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 335f37ba98de..70a7026db08d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -68,10 +68,7 @@ static void mock_device_release(struct drm_device *dev)
 	drain_workqueue(i915->wq);
 	i915_gem_drain_freed_objects(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	mock_fini_ggtt(&i915->ggtt);
-	mutex_unlock(&i915->drm.struct_mutex);
-
 	destroy_workqueue(i915->wq);
 
 	i915_gemfs_fini(i915);
@@ -179,8 +176,6 @@ struct drm_i915_private *mock_gem_device(void)
 
 	intel_timelines_init(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	mock_init_ggtt(i915, &i915->ggtt);
 
 	mkwrite_device_info(i915)->engine_mask = BIT(0);
@@ -197,7 +192,6 @@ struct drm_i915_private *mock_gem_device(void)
 		goto err_context;
 
 	intel_engines_driver_register(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	WARN_ON(i915_gemfs_init(i915));
 
@@ -208,7 +202,6 @@ struct drm_i915_private *mock_gem_device(void)
 err_engine:
 	mock_engine_free(i915->engine[RCS0]);
 err_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
 	intel_timelines_fini(i915);
 	destroy_workqueue(i915->wq);
 err_drv:
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH 30/30] drm/i915: Flush idle barriers when waiting
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (27 preceding siblings ...)
  2019-10-02 11:19 ` [PATCH 29/30] drm/i915: Drop struct_mutex from around GEM initialisation Chris Wilson
@ 2019-10-02 11:20 ` Chris Wilson
  2019-10-02 13:21 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore Patchwork
                   ` (5 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:20 UTC (permalink / raw)
  To: intel-gfx

If we do find ourselves with an idle barrier inside our active while
waiting, attempt to flush it by emitting a pulse using the kernel
context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  | 14 +++++++++++++
 .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |  1 +
 drivers/gpu/drm/i915/i915_active.c            | 21 +++++++++++++++++--
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index f68acf9118f3..e27bb7f028bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -169,3 +169,17 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
 	intel_engine_pm_put(engine);
 	return err;
 }
+
+int intel_engine_flush_barriers(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	rq = i915_request_create(engine->kernel_context);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	idle_pulse(engine, rq);
+	i915_request_add(rq);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
index 39391004554d..0c1ad0fc091d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -15,5 +15,6 @@ void intel_engine_park_heartbeat(struct intel_engine_cs *engine);
 void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
 
 int intel_engine_pulse(struct intel_engine_cs *engine);
+int intel_engine_flush_barriers(struct intel_engine_cs *engine);
 
 #endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index aa37c07004b9..98d5fe1c7e19 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -6,6 +6,7 @@
 
 #include <linux/debugobjects.h>
 
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_pm.h"
 
 #include "i915_drv.h"
@@ -435,6 +436,21 @@ static void enable_signaling(struct i915_active_fence *active)
 	dma_fence_put(fence);
 }
 
+static int flush_barrier(struct active_node *it)
+{
+	struct intel_engine_cs *engine;
+
+	if (!is_barrier(&it->base))
+		return 0;
+
+	engine = __barrier_to_engine(it);
+	smp_rmb(); /* serialise with add_active_barriers */
+	if (!is_barrier(&it->base))
+		return 0;
+
+	return intel_engine_flush_barriers(engine);
+}
+
 int i915_active_wait(struct i915_active *ref)
 {
 	struct active_node *it, *n;
@@ -448,8 +464,9 @@ int i915_active_wait(struct i915_active *ref)
 	/* Flush lazy signals */
 	enable_signaling(&ref->excl);
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
-		if (is_barrier(&it->base)) /* unconnected idle barrier */
-			continue;
+		err = flush_barrier(it); /* unconnected idle barrier? */
+		if (err)
+			break;
 
 		enable_signaling(&it->base);
 	}
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (28 preceding siblings ...)
  2019-10-02 11:20 ` [PATCH 30/30] drm/i915: Flush idle barriers when waiting Chris Wilson
@ 2019-10-02 13:21 ` Patchwork
  2019-10-02 13:34 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (4 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Patchwork @ 2019-10-02 13:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore
URL   : https://patchwork.freedesktop.org/series/67483/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
099f6ae3c72d drm/i915/selftests: Exercise potential false lite-restore
48a5a08e3523 dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
-:14: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 0fc89b6802ba ("dma-fence: Simply wrap dma_fence_signal_locked with dma_fence_signal")'
#14: 
See also 0fc89b6802ba ("dma-fence: Simply wrap dma_fence_signal_locked

total: 1 errors, 0 warnings, 0 checks, 24 lines checked
e93a397fa0d2 drm/mm: Pack allocated/scanned boolean into a bitfield
6642ae7aa59e drm/i915: Only track bound elements of the GTT
24c5d7101d43 drm/i915: Mark up address spaces that may need to allocate
4780268eb9ee drm/i915: Pull i915_vma_pin under the vm->mutex
ac368d894b03 drm/i915: Push the i915_active.retire into a worker
599d9ac80be6 drm/i915: Coordinate i915_active with its own mutex
-:1339: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#1339: FILE: drivers/gpu/drm/i915/i915_active_types.h:49:
+	struct mutex mutex;

total: 0 errors, 0 warnings, 1 checks, 1481 lines checked
609a95b5ebdd drm/i915: Move idle barrier cleanup into engine-pm
3e257cd827c5 drm/i915: Drop struct_mutex from around i915_retire_requests()
b34f4178512d drm/i915: Remove the GEM idle worker
549ca7d3ccb8 drm/i915: Merge wait_for_timelines with retire_request
5f78b2d8eaa1 drm/i915/gem: Retire directly for mmap-offset shrinking
0f64ec7c4dc9 drm/i915: Move request runtime management onto gt
-:235: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#235: 
new file mode 100644

-:240: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#240: FILE: drivers/gpu/drm/i915/gt/intel_gt_requests.c:1:
+/*

-:241: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#241: FILE: drivers/gpu/drm/i915/gt/intel_gt_requests.c:2:
+ * SPDX-License-Identifier: MIT

-:369: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#369: FILE: drivers/gpu/drm/i915/gt/intel_gt_requests.h:1:
+/*

-:370: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#370: FILE: drivers/gpu/drm/i915/gt/intel_gt_requests.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 725 lines checked
d806a7455a70 drm/i915: Move global activity tracking from GEM to GT
ed2665cd47ce drm/i915: Expose engine properties via sysfs
-:68: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#68: 
new file mode 100644

-:73: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#73: FILE: drivers/gpu/drm/i915/gt/intel_engine_sysfs.c:1:
+/*

-:74: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#74: FILE: drivers/gpu/drm/i915/gt/intel_engine_sysfs.c:2:
+ * SPDX-License-Identifier: MIT

-:198: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#198: FILE: drivers/gpu/drm/i915/gt/intel_engine_sysfs.h:1:
+/*

-:199: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#199: FILE: drivers/gpu/drm/i915/gt/intel_engine_sysfs.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 158 lines checked
a1ec3bd4b994 drm/i915/execlists: Force preemption
7392453b5491 drm/i915: Mark up "sentinel" requests
a84e25080394 drm/i915/execlists: Cancel banned contexts on schedule-out
36c0ae8588b3 drm/i915: Cancel non-persistent contexts on close
-:282: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#282: 
new file mode 100644

-:287: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#287: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:1:
+/*

-:288: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#288: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:2:
+ * SPDX-License-Identifier: MIT

-:349: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#349: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h:1:
+/*

-:350: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#350: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 319 lines checked
01065375a0b4 drm/i915: Replace hangcheck by heartbeats
-:236: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#236: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:69:
+					  "%s heartbeat not ticking\n",

-:253: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#253: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:86:
+					      "stopped heartbeat on %s",

-:540: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#540: 
deleted file mode 100644

total: 0 errors, 3 warnings, 0 checks, 672 lines checked
1c94e090a34e drm/i915: Remove logical HW ID
1e6d926f34c5 drm/i915: Move context management under GEM
-:1061: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#1061: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:988:
+#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
                                   ^

total: 0 errors, 0 warnings, 1 checks, 1717 lines checked
e9936b3ae50f drm/i915/overlay: Drop struct_mutex guard
8fc7dc0d282a drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info
e472c60fd9d7 drm/i915: Remove struct_mutex guard for debugfs/opregion
e5dc8b772718 drm/i915: Drop struct_mutex from suspend state save/restore
132ce69b74db drm/i915/selftests: Drop vestigal struct_mutex guards
277ca2deb924 drm/i915: Drop struct_mutex from around GEM initialisation
b88fbb322e7a drm/i915: Flush idle barriers when waiting

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (29 preceding siblings ...)
  2019-10-02 13:21 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore Patchwork
@ 2019-10-02 13:34 ` Patchwork
  2019-10-02 13:46 ` ✗ Fi.CI.BAT: failure " Patchwork
                   ` (3 subsequent siblings)
  34 siblings, 0 replies; 50+ messages in thread
From: Patchwork @ 2019-10-02 13:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore
URL   : https://patchwork.freedesktop.org/series/67483/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.6.0
Commit: drm/i915/selftests: Exercise potential false lite-restore
Okay!

Commit: dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
Okay!

Commit: drm/mm: Pack allocated/scanned boolean into a bitfield
Okay!

Commit: drm/i915: Only track bound elements of the GTT
Okay!

Commit: drm/i915: Mark up address spaces that may need to allocate
Okay!

Commit: drm/i915: Pull i915_vma_pin under the vm->mutex
Okay!

Commit: drm/i915: Push the i915_active.retire into a worker
Okay!

Commit: drm/i915: Coordinate i915_active with its own mutex
Okay!

Commit: drm/i915: Move idle barrier cleanup into engine-pm
Okay!

Commit: drm/i915: Drop struct_mutex from around i915_retire_requests()
Okay!

Commit: drm/i915: Remove the GEM idle worker
Okay!

Commit: drm/i915: Merge wait_for_timelines with retire_request
Okay!

Commit: drm/i915/gem: Retire directly for mmap-offset shrinking
Okay!

Commit: drm/i915: Move request runtime management onto gt
Okay!

Commit: drm/i915: Move global activity tracking from GEM to GT
Okay!

Commit: drm/i915: Expose engine properties via sysfs
Okay!

Commit: drm/i915/execlists: Force preemption
Okay!

Commit: drm/i915: Mark up "sentinel" requests
Okay!

Commit: drm/i915/execlists: Cancel banned contexts on schedule-out
Okay!

Commit: drm/i915: Cancel non-persistent contexts on close
Okay!

Commit: drm/i915: Replace hangcheck by heartbeats
Okay!

Commit: drm/i915: Remove logical HW ID
Okay!

Commit: drm/i915: Move context management under GEM
Okay!

Commit: drm/i915/overlay: Drop struct_mutex guard
Okay!

Commit: drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (30 preceding siblings ...)
  2019-10-02 13:34 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-10-02 13:46 ` Patchwork
  2019-10-02 13:49   ` Chris Wilson
  2019-10-02 20:16 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore (rev2) Patchwork
                   ` (2 subsequent siblings)
  34 siblings, 1 reply; 50+ messages in thread
From: Patchwork @ 2019-10-02 13:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore
URL   : https://patchwork.freedesktop.org/series/67483/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6988 -> Patchwork_14625
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_14625 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_14625, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_14625:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_execlists:
    - fi-bdw-gvtdvm:      NOTRUN -> [INCOMPLETE][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bdw-gvtdvm/igt@i915_selftest@live_execlists.html
    - fi-skl-guc:         [PASS][2] -> [INCOMPLETE][3]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-skl-guc/igt@i915_selftest@live_execlists.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-skl-guc/igt@i915_selftest@live_execlists.html
    - fi-kbl-guc:         [PASS][4] -> [INCOMPLETE][5]
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-kbl-guc/igt@i915_selftest@live_execlists.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-kbl-guc/igt@i915_selftest@live_execlists.html
    - fi-skl-6600u:       NOTRUN -> [INCOMPLETE][6]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-skl-6600u/igt@i915_selftest@live_execlists.html
    - fi-bsw-n3050:       [PASS][7] -> [INCOMPLETE][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-bsw-n3050/igt@i915_selftest@live_execlists.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bsw-n3050/igt@i915_selftest@live_execlists.html
    - fi-bsw-kefka:       [PASS][9] -> [INCOMPLETE][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-bsw-kefka/igt@i915_selftest@live_execlists.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bsw-kefka/igt@i915_selftest@live_execlists.html
    - fi-skl-6260u:       NOTRUN -> [INCOMPLETE][11]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-skl-6260u/igt@i915_selftest@live_execlists.html
    - fi-bdw-5557u:       [PASS][12] -> [INCOMPLETE][13]
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-bdw-5557u/igt@i915_selftest@live_execlists.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bdw-5557u/igt@i915_selftest@live_execlists.html

  * igt@runner@aborted:
    - fi-bdw-gvtdvm:      NOTRUN -> [FAIL][14]
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bdw-gvtdvm/igt@runner@aborted.html
    - fi-bsw-n3050:       NOTRUN -> [FAIL][15]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bsw-n3050/igt@runner@aborted.html
    - fi-bsw-kefka:       NOTRUN -> [FAIL][16]
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bsw-kefka/igt@runner@aborted.html
    - fi-bdw-5557u:       NOTRUN -> [FAIL][17]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bdw-5557u/igt@runner@aborted.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_selftest@live_execlists:
    - {fi-tgl-u}:         NOTRUN -> [INCOMPLETE][18]
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-tgl-u/igt@i915_selftest@live_execlists.html

  * igt@runner@aborted:
    - {fi-tgl-u}:         NOTRUN -> [FAIL][19]
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-tgl-u/igt@runner@aborted.html

  
Known issues
------------

  Here are the changes found in Patchwork_14625 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_switch@legacy-render:
    - fi-cml-u2:          [PASS][20] -> [INCOMPLETE][21] ([fdo#110566] / [fdo#111381])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-cml-u2/igt@gem_ctx_switch@legacy-render.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-cml-u2/igt@gem_ctx_switch@legacy-render.html

  * igt@gem_exec_suspend@basic-s3:
    - fi-blb-e6850:       [PASS][22] -> [INCOMPLETE][23] ([fdo#107718])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html

  * igt@gem_flink_basic@basic:
    - fi-icl-u3:          [PASS][24] -> [DMESG-WARN][25] ([fdo#107724]) +1 similar issue
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-icl-u3/igt@gem_flink_basic@basic.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-icl-u3/igt@gem_flink_basic@basic.html

  * igt@i915_selftest@live_execlists:
    - fi-icl-u3:          [PASS][26] -> [INCOMPLETE][27] ([fdo#107713])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-icl-u3/igt@i915_selftest@live_execlists.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-icl-u3/igt@i915_selftest@live_execlists.html
    - fi-glk-dsi:         [PASS][28] -> [INCOMPLETE][29] ([fdo#103359] / [k.org#198133])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-glk-dsi/igt@i915_selftest@live_execlists.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-glk-dsi/igt@i915_selftest@live_execlists.html

  * igt@i915_selftest@live_gem:
    - fi-icl-u2:          [PASS][30] -> [INCOMPLETE][31] ([fdo#107713])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-icl-u2/igt@i915_selftest@live_gem.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-icl-u2/igt@i915_selftest@live_gem.html

  
#### Possible fixes ####

  * igt@gem_exec_gttfill@basic:
    - fi-apl-guc:         [DMESG-WARN][32] ([fdo#109385] / [fdo#111652 ]) -> [PASS][33]
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-apl-guc/igt@gem_exec_gttfill@basic.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-apl-guc/igt@gem_exec_gttfill@basic.html

  * igt@gem_exec_parallel@basic:
    - {fi-tgl-u2}:        [INCOMPLETE][34] -> [PASS][35]
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-tgl-u2/igt@gem_exec_parallel@basic.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-tgl-u2/igt@gem_exec_parallel@basic.html

  * igt@i915_module_load@reload-with-fault-injection:
    - {fi-icl-guc}:       [DMESG-WARN][36] ([fdo#106107]) -> [PASS][37]
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-icl-guc/igt@i915_module_load@reload-with-fault-injection.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-icl-guc/igt@i915_module_load@reload-with-fault-injection.html

  * igt@kms_chamelium@hdmi-edid-read:
    - {fi-icl-u4}:        [FAIL][38] ([fdo#111045]) -> [PASS][39]
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-icl-u4/igt@kms_chamelium@hdmi-edid-read.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-icl-u4/igt@kms_chamelium@hdmi-edid-read.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-icl-u2:          [FAIL][40] ([fdo#109483]) -> [PASS][41]
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-icl-u2/igt@kms_chamelium@hdmi-hpd-fast.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-icl-u2/igt@kms_chamelium@hdmi-hpd-fast.html
    - {fi-icl-u4}:        [FAIL][42] ([fdo#111045] / [fdo#111096]) -> [PASS][43]
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-icl-u4/igt@kms_chamelium@hdmi-hpd-fast.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-icl-u4/igt@kms_chamelium@hdmi-hpd-fast.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - {fi-icl-dsi}:       [DMESG-WARN][44] ([fdo#106107]) -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-icl-dsi/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-icl-dsi/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  * igt@prime_self_import@basic-llseek-size:
    - fi-icl-u3:          [DMESG-WARN][46] ([fdo#107724]) -> [PASS][47] +2 similar issues
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-icl-u3/igt@prime_self_import@basic-llseek-size.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-icl-u3/igt@prime_self_import@basic-llseek-size.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109385]: https://bugs.freedesktop.org/show_bug.cgi?id=109385
  [fdo#109483]: https://bugs.freedesktop.org/show_bug.cgi?id=109483
  [fdo#110566]: https://bugs.freedesktop.org/show_bug.cgi?id=110566
  [fdo#111045]: https://bugs.freedesktop.org/show_bug.cgi?id=111045
  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#111381]: https://bugs.freedesktop.org/show_bug.cgi?id=111381
  [fdo#111652 ]: https://bugs.freedesktop.org/show_bug.cgi?id=111652 
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (49 -> 46)
------------------------------

  Additional (4): fi-bdw-gvtdvm fi-tgl-u fi-skl-6260u fi-skl-6600u 
  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6988 -> Patchwork_14625

  CI-20190529: 20190529
  CI_DRM_6988: a41f194b3d8797220fd614bfd13df8752507bb27 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5209: ec639c89860b859fdf4b038c2fa8ad593bd6909e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14625: b88fbb322e7a7db062a673fd2af3b85e6aa7f36c @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

b88fbb322e7a drm/i915: Flush idle barriers when waiting
277ca2deb924 drm/i915: Drop struct_mutex from around GEM initialisation
132ce69b74db drm/i915/selftests: Drop vestigal struct_mutex guards
e5dc8b772718 drm/i915: Drop struct_mutex from suspend state save/restore
e472c60fd9d7 drm/i915: Remove struct_mutex guard for debugfs/opregion
8fc7dc0d282a drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info
e9936b3ae50f drm/i915/overlay: Drop struct_mutex guard
1e6d926f34c5 drm/i915: Move context management under GEM
1c94e090a34e drm/i915: Remove logical HW ID
01065375a0b4 drm/i915: Replace hangcheck by heartbeats
36c0ae8588b3 drm/i915: Cancel non-persistent contexts on close
a84e25080394 drm/i915/execlists: Cancel banned contexts on schedule-out
7392453b5491 drm/i915: Mark up "sentinel" requests
a1ec3bd4b994 drm/i915/execlists: Force preemption
ed2665cd47ce drm/i915: Expose engine properties via sysfs
d806a7455a70 drm/i915: Move global activity tracking from GEM to GT
0f64ec7c4dc9 drm/i915: Move request runtime management onto gt
5f78b2d8eaa1 drm/i915/gem: Retire directly for mmap-offset shrinking
549ca7d3ccb8 drm/i915: Merge wait_for_timelines with retire_request
b34f4178512d drm/i915: Remove the GEM idle worker
3e257cd827c5 drm/i915: Drop struct_mutex from around i915_retire_requests()
609a95b5ebdd drm/i915: Move idle barrier cleanup into engine-pm
599d9ac80be6 drm/i915: Coordinate i915_active with its own mutex
ac368d894b03 drm/i915: Push the i915_active.retire into a worker
4780268eb9ee drm/i915: Pull i915_vma_pin under the vm->mutex
24c5d7101d43 drm/i915: Mark up address spaces that may need to allocate
6642ae7aa59e drm/i915: Only track bound elements of the GTT
e93a397fa0d2 drm/mm: Pack allocated/scanned boolean into a bitfield
48a5a08e3523 dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
099f6ae3c72d drm/i915/selftests: Exercise potential false lite-restore

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: ✗ Fi.CI.BAT: failure for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore
  2019-10-02 13:46 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2019-10-02 13:49   ` Chris Wilson
  0 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 13:49 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2019-10-02 14:46:05)
> == Series Details ==
> 
> Series: series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore
> URL   : https://patchwork.freedesktop.org/series/67483/
> State : failure
> 
> == Summary ==
> 
> CI Bug Log - changes from CI_DRM_6988 -> Patchwork_14625
> ====================================================
> 
> Summary
> -------
> 
>   **FAILURE**
> 
>   Serious unknown changes coming with Patchwork_14625 absolutely need to be
>   verified manually.
>   
>   If you think the reported changes have nothing to do with the changes
>   introduced in Patchwork_14625, please notify your bug team to allow them
>   to document this new failure mode, which will reduce false positives in CI.
> 
>   External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/index.html
> 
> Possible new issues
> -------------------
> 
>   Here are the unknown changes that may have been introduced in Patchwork_14625:
> 
> ### IGT changes ###
> 
> #### Possible regressions ####
> 
>   * igt@i915_selftest@live_execlists:
>     - fi-bdw-gvtdvm:      NOTRUN -> [INCOMPLETE][1]
>    [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bdw-gvtdvm/igt@i915_selftest@live_execlists.html
>     - fi-skl-guc:         [PASS][2] -> [INCOMPLETE][3]
>    [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-skl-guc/igt@i915_selftest@live_execlists.html
>    [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-skl-guc/igt@i915_selftest@live_execlists.html
>     - fi-kbl-guc:         [PASS][4] -> [INCOMPLETE][5]
>    [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-kbl-guc/igt@i915_selftest@live_execlists.html
>    [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-kbl-guc/igt@i915_selftest@live_execlists.html
>     - fi-skl-6600u:       NOTRUN -> [INCOMPLETE][6]
>    [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-skl-6600u/igt@i915_selftest@live_execlists.html
>     - fi-bsw-n3050:       [PASS][7] -> [INCOMPLETE][8]
>    [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-bsw-n3050/igt@i915_selftest@live_execlists.html
>    [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bsw-n3050/igt@i915_selftest@live_execlists.html
>     - fi-bsw-kefka:       [PASS][9] -> [INCOMPLETE][10]
>    [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-bsw-kefka/igt@i915_selftest@live_execlists.html
>    [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bsw-kefka/igt@i915_selftest@live_execlists.html
>     - fi-skl-6260u:       NOTRUN -> [INCOMPLETE][11]
>    [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-skl-6260u/igt@i915_selftest@live_execlists.html
>     - fi-bdw-5557u:       [PASS][12] -> [INCOMPLETE][13]
>    [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6988/fi-bdw-5557u/igt@i915_selftest@live_execlists.html
>    [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14625/fi-bdw-5557u/igt@i915_selftest@live_execlists.html

I got bored of waiting for trybot for the unlite test results.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close
  2019-10-02 11:19 ` [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close Chris Wilson
@ 2019-10-02 13:52   ` Bloomfield, Jon
  2019-10-02 14:05     ` Chris Wilson
  2019-10-02 14:23     ` Chris Wilson
  2019-10-02 14:23   ` [PATCH v2] " Chris Wilson
  1 sibling, 2 replies; 50+ messages in thread
From: Bloomfield, Jon @ 2019-10-02 13:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx



> -----Original Message-----
> From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: Wednesday, October 2, 2019 4:20 AM
> To: intel-gfx@lists.freedesktop.org
> Cc: Chris Wilson <chris@chris-wilson.co.uk>; Joonas Lahtinen
> <joonas.lahtinen@linux.intel.com>; Winiarski, Michal
> <michal.winiarski@intel.com>; Bloomfield, Jon <jon.bloomfield@intel.com>
> Subject: [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close
> 
> Normally, we rely on our hangcheck to prevent persistent batches from
> hogging the GPU. However, if the user disables hangcheck, this mechanism
> breaks down. Despite our insistence that this is unsafe, the users are
> equally insistent that they want to use endless batches and will disable
> the hangcheck mechanism. We are looking at perhaps replacing hangcheck
> with a softer mechanism, that sends a pulse down the engine to check if
> it is well. We can use the same preemptive pulse to flush an active
> persistent context off the GPU upon context close, preventing resources
> being lost and unkillable requests remaining on the GPU after process
> termination. To avoid changing the ABI and accidentally breaking
> existing userspace, we make the persistence of a context explicit and
> enable it by default (matching current ABI). Userspace can opt out of
> persistent mode (forcing requests to be cancelled when the context is
> closed by process termination or explicitly) by a context parameter. To
> facilitate existing use-cases of disabling hangcheck, if the modparam is
> disabled (i915.enable_hangcheck=0), we disable persistence mode by
> default.  (Note, one of the outcomes for supporting endless mode will be
> the removal of hangchecking, at which point opting into persistent mode
> will be mandatory, or maybe the default perhaps controlled by cgroups.)
> 
> Testcase: igt/gem_ctx_persistence
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile                 |   1 +
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 122 ++++++++++++++++++
>  drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 +++
>  .../gpu/drm/i915/gem/i915_gem_context_types.h |   1 +
>  .../gpu/drm/i915/gem/selftests/mock_context.c |   2 +
>  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  56 ++++++++
>  .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |  14 ++
>  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   2 +-
>  drivers/gpu/drm/i915/i915_priolist_types.h    |   1 +
>  include/uapi/drm/i915_drm.h                   |  15 +++
>  10 files changed, 228 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
>  create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 1f87c70a2842..561fa2bb3006 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -76,6 +76,7 @@ gt-y += \
>  	gt/intel_breadcrumbs.o \
>  	gt/intel_context.o \
>  	gt/intel_engine_cs.o \
> +	gt/intel_engine_heartbeat.o \
>  	gt/intel_engine_pm.o \
>  	gt/intel_engine_pool.o \
>  	gt/intel_engine_sysfs.o \
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 0ab416887fc2..e832238a72e5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c

<SNIP>

> +static int
> +set_persistence(struct i915_gem_context *ctx,
> +		const struct drm_i915_gem_context_param *args)
> +{
> +	if (args->size)
> +		return -EINVAL;
> +
> +	if (args->value) {
> +		i915_gem_context_set_persistence(ctx);
> +		return 0;
> +	}
> +
> +	/* To cancel a context we use "preempt-to-idle" */
> +	if (!(ctx->i915->caps.scheduler &
> I915_SCHEDULER_CAP_PREEMPTION))
> +		return -ENODEV;
> +
> +	i915_gem_context_clear_persistence(ctx);
> +	return 0;
> +}

Hmmn. Given that disabling hangcheck is an explicit operation, and we already change the default setting, can't we make it a hard requirement that persistence requires hangcheck? You should not really be able to opt back in to persistence if hangcheck is disabled. In fact you could just test for hangcheck when deciding whether to kill the context, and force-kill if it is off - that way if hangcheck is disabled after a context starts it will get cleaned up.


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 21/30] drm/i915: Replace hangcheck by heartbeats
  2019-10-02 11:19 ` [PATCH 21/30] drm/i915: Replace hangcheck by heartbeats Chris Wilson
@ 2019-10-02 13:55   ` Bloomfield, Jon
  2019-10-02 14:28     ` Chris Wilson
  0 siblings, 1 reply; 50+ messages in thread
From: Bloomfield, Jon @ 2019-10-02 13:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

> -----Original Message-----
> From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: Wednesday, October 2, 2019 4:20 AM
> To: intel-gfx@lists.freedesktop.org
> Cc: Chris Wilson <chris@chris-wilson.co.uk>; Joonas Lahtinen
> <joonas.lahtinen@linux.intel.com>; Ursulin, Tvrtko <tvrtko.ursulin@intel.com>;
> Bloomfield, Jon <jon.bloomfield@intel.com>
> Subject: [PATCH 21/30] drm/i915: Replace hangcheck by heartbeats
> 
> Replace sampling the engine state every so often with a periodic
> heartbeat request to measure the health of an engine. This is coupled
> with the forced-preemption to allow long running requests to survive so
> long as they do not block other users.
> 
> The heartbeat interval can be adjusted per-engine using,
> 
> 	/sys/class/drm/card0/engine/*/heartbeat_interval_ms

Not true for discrete :-)

Perhaps: /sys/class/drm/card<n>/engine/*/heartbeat_interval_ms
And again in the code below.


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close
  2019-10-02 13:52   ` Bloomfield, Jon
@ 2019-10-02 14:05     ` Chris Wilson
  2019-10-02 14:23     ` Chris Wilson
  1 sibling, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 14:05 UTC (permalink / raw)
  To: Bloomfield, Jon, intel-gfx

Quoting Bloomfield, Jon (2019-10-02 14:52:32)
> 
> 
> > -----Original Message-----
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> > Sent: Wednesday, October 2, 2019 4:20 AM
> > To: intel-gfx@lists.freedesktop.org
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>; Joonas Lahtinen
> > <joonas.lahtinen@linux.intel.com>; Winiarski, Michal
> > <michal.winiarski@intel.com>; Bloomfield, Jon <jon.bloomfield@intel.com>
> > Subject: [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close
> > 
> > Normally, we rely on our hangcheck to prevent persistent batches from
> > hogging the GPU. However, if the user disables hangcheck, this mechanism
> > breaks down. Despite our insistence that this is unsafe, the users are
> > equally insistent that they want to use endless batches and will disable
> > the hangcheck mechanism. We are looking at perhaps replacing hangcheck
> > with a softer mechanism, that sends a pulse down the engine to check if
> > it is well. We can use the same preemptive pulse to flush an active
> > persistent context off the GPU upon context close, preventing resources
> > being lost and unkillable requests remaining on the GPU after process
> > termination. To avoid changing the ABI and accidentally breaking
> > existing userspace, we make the persistence of a context explicit and
> > enable it by default (matching current ABI). Userspace can opt out of
> > persistent mode (forcing requests to be cancelled when the context is
> > closed by process termination or explicitly) by a context parameter. To
> > facilitate existing use-cases of disabling hangcheck, if the modparam is
> > disabled (i915.enable_hangcheck=0), we disable persistence mode by
> > default.  (Note, one of the outcomes for supporting endless mode will be
> > the removal of hangchecking, at which point opting into persistent mode
> > will be mandatory, or maybe the default perhaps controlled by cgroups.)
> > 
> > Testcase: igt/gem_ctx_persistence
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> > Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
> > ---
> >  drivers/gpu/drm/i915/Makefile                 |   1 +
> >  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 122 ++++++++++++++++++
> >  drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 +++
> >  .../gpu/drm/i915/gem/i915_gem_context_types.h |   1 +
> >  .../gpu/drm/i915/gem/selftests/mock_context.c |   2 +
> >  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  56 ++++++++
> >  .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |  14 ++
> >  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   2 +-
> >  drivers/gpu/drm/i915/i915_priolist_types.h    |   1 +
> >  include/uapi/drm/i915_drm.h                   |  15 +++
> >  10 files changed, 228 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> >  create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> > 
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 1f87c70a2842..561fa2bb3006 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -76,6 +76,7 @@ gt-y += \
> >       gt/intel_breadcrumbs.o \
> >       gt/intel_context.o \
> >       gt/intel_engine_cs.o \
> > +     gt/intel_engine_heartbeat.o \
> >       gt/intel_engine_pm.o \
> >       gt/intel_engine_pool.o \
> >       gt/intel_engine_sysfs.o \
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index 0ab416887fc2..e832238a72e5 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> 
> <SNIP>
> 
> > +static int
> > +set_persistence(struct i915_gem_context *ctx,
> > +             const struct drm_i915_gem_context_param *args)
> > +{
> > +     if (args->size)
> > +             return -EINVAL;
> > +
> > +     if (args->value) {
> > +             i915_gem_context_set_persistence(ctx);
> > +             return 0;
> > +     }
> > +
> > +     /* To cancel a context we use "preempt-to-idle" */
> > +     if (!(ctx->i915->caps.scheduler &
> > I915_SCHEDULER_CAP_PREEMPTION))
> > +             return -ENODEV;
> > +
> > +     i915_gem_context_clear_persistence(ctx);
> > +     return 0;
> > +}
> 
> Hmmn. Given that disabling hangcheck is an explicit operation, and we already change the default setting, can't we make it a hard requirement that persistence requires hangcheck? You should not really be able to opt back in to persistence if hangcheck is disabled. In fact you could just test for hangcheck when deciding whether to kill the context, and force-kill if it is off - that way if hangcheck is disabled after a context starts it will get cleaned up.

I hear the toctou argument, but I really, really want to avoid coupling
in the modparam as much possible (I'd rather kill off the modparam
entirely for being too coarse).

But certainly having a check here saying you cannot re-enable
persistence without hangcheck seems valid. Let's see how that looks.
(That takes a bit of lalala to ignore the toctou.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2] drm/i915: Cancel non-persistent contexts on close
  2019-10-02 11:19 ` [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close Chris Wilson
  2019-10-02 13:52   ` Bloomfield, Jon
@ 2019-10-02 14:23   ` Chris Wilson
  2019-10-02 14:36     ` Bloomfield, Jon
  1 sibling, 1 reply; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 14:23 UTC (permalink / raw)
  To: intel-gfx

Normally, we rely on our hangcheck to prevent persistent batches from
hogging the GPU. However, if the user disables hangcheck, this mechanism
breaks down. Despite our insistence that this is unsafe, the users are
equally insistent that they want to use endless batches and will disable
the hangcheck mechanism. We are looking at perhaps replacing hangcheck
with a softer mechanism, that sends a pulse down the engine to check if
it is well. We can use the same preemptive pulse to flush an active
persistent context off the GPU upon context close, preventing resources
being lost and unkillable requests remaining on the GPU after process
termination. To avoid changing the ABI and accidentally breaking
existing userspace, we make the persistence of a context explicit and
enable it by default (matching current ABI). Userspace can opt out of
persistent mode (forcing requests to be cancelled when the context is
closed by process termination or explicitly) by a context parameter. To
facilitate existing use-cases of disabling hangcheck, if the modparam is
disabled (i915.enable_hangcheck=0), we disable persistence mode by
default.  (Note, one of the outcomes for supporting endless mode will be
the removal of hangchecking, at which point opting into persistent mode
will be mandatory, or maybe the default perhaps controlled by cgroups.)

v2: Check for hangchecking at context termination, so that we are not
left with undying contexts from a craft user.

Testcase: igt/gem_ctx_persistence
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 132 ++++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 ++
 .../gpu/drm/i915/gem/i915_gem_context_types.h |   1 +
 .../gpu/drm/i915/gem/selftests/mock_context.c |   2 +
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  56 ++++++++
 .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |  14 ++
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   2 +-
 drivers/gpu/drm/i915/i915_priolist_types.h    |   1 +
 include/uapi/drm/i915_drm.h                   |  15 ++
 10 files changed, 238 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1f87c70a2842..561fa2bb3006 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -76,6 +76,7 @@ gt-y += \
 	gt/intel_breadcrumbs.o \
 	gt/intel_context.o \
 	gt/intel_engine_cs.o \
+	gt/intel_engine_heartbeat.o \
 	gt/intel_engine_pm.o \
 	gt/intel_engine_pool.o \
 	gt/intel_engine_sysfs.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 0ab416887fc2..172fe95d863e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -70,6 +70,7 @@
 #include <drm/i915_drm.h>
 
 #include "gt/intel_lrc_reg.h"
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_engine_user.h"
 
 #include "i915_gem_context.h"
@@ -375,6 +376,78 @@ void i915_gem_context_release(struct kref *ref)
 		queue_work(i915->wq, &i915->contexts.free_work);
 }
 
+static inline struct i915_gem_engines *
+__context_engines_static(struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->engines, true);
+}
+
+static void kill_context(struct i915_gem_context *ctx)
+{
+	intel_engine_mask_t tmp, active, reset;
+	struct intel_gt *gt = &ctx->i915->gt;
+	struct i915_gem_engines_iter it;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+
+	/*
+	 * If we are already banned, it was due to a guilty request causing
+	 * a reset and the entire context being evicted from the GPU.
+	 */
+	if (i915_gem_context_is_banned(ctx))
+		return;
+
+	i915_gem_context_set_banned(ctx);
+
+	/*
+	 * Map the user's engine back to the actual engines; one virtual
+	 * engine will be mapped to multiple engines, and using ctx->engine[]
+	 * the same engine may be have multiple instances in the user's map.
+	 * However, we only care about pending requests, so only include
+	 * engines on which there are incomplete requests.
+	 */
+	active = 0;
+	for_each_gem_engine(ce, __context_engines_static(ctx), it) {
+		struct dma_fence *fence;
+
+		if (!ce->timeline)
+			continue;
+
+		fence = i915_active_fence_get(&ce->timeline->last_request);
+		if (!fence)
+			continue;
+
+		engine = to_request(fence)->engine;
+		if (HAS_EXECLISTS(gt->i915))
+			engine = intel_context_inflight(ce);
+		if (engine)
+			active |= engine->mask;
+
+		dma_fence_put(fence);
+	}
+
+	/*
+	 * Send a "high priority pulse" down the engine to cause the
+	 * current request to be momentarily preempted. (If it fails to
+	 * be preempted, it will be reset). As we have marked our context
+	 * as banned, any incomplete request, including any running, will
+	 * be skipped following the preemption.
+	 */
+	reset = 0;
+	for_each_engine_masked(engine, gt->i915, active, tmp)
+		if (intel_engine_pulse(engine))
+			reset |= engine->mask;
+
+	/*
+	 * If we are unable to send a preemptive pulse to bump
+	 * the context from the GPU, we have to resort to a full
+	 * reset. We hope the collateral damage is worth it.
+	 */
+	if (reset)
+		intel_gt_handle_error(gt, reset, 0,
+				      "context closure in %s", ctx->name);
+}
+
 static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
@@ -400,9 +473,47 @@ static void context_close(struct i915_gem_context *ctx)
 	lut_close(ctx);
 
 	mutex_unlock(&ctx->mutex);
+
+	/*
+	 * If the user has disabled hangchecking, we can not be sure that
+	 * the batches will ever complete after the context is closed,
+	 * keep the context and all resources pinned forever. So in this
+	 * case we opt to forcibly kill off all remaining requests on
+	 * context close.
+	 */
+	if (!i915_gem_context_is_persistent(ctx) ||
+	    !i915_modparams.enable_hangcheck)
+		kill_context(ctx);
+
 	i915_gem_context_put(ctx);
 }
 
+static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
+{
+	if (i915_gem_context_is_persistent(ctx) == state)
+		return 0;
+
+	if (state) {
+		/*
+		 * Only contexts that are short-lived [that will expire or be
+		 * reset] are allowed to survive past termination. We require
+		 * hangcheck to ensure that the persistent requests are healthy.
+		 */
+		if (!i915_modparams.enable_hangcheck)
+			return -EINVAL;
+
+		i915_gem_context_set_persistence(ctx);
+	} else {
+		/* To cancel a context we use "preempt-to-idle" */
+		if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+			return -ENODEV;
+
+		i915_gem_context_clear_persistence(ctx);
+	}
+
+	return 0;
+}
+
 static struct i915_gem_context *
 __create_context(struct drm_i915_private *i915)
 {
@@ -439,6 +550,7 @@ __create_context(struct drm_i915_private *i915)
 
 	i915_gem_context_set_bannable(ctx);
 	i915_gem_context_set_recoverable(ctx);
+	__context_set_persistence(ctx, true /* cgroup hook? */);
 
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
@@ -597,6 +709,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	}
 
 	i915_gem_context_clear_bannable(ctx);
+	i915_gem_context_set_persistence(ctx);
 	ctx->sched.priority = I915_USER_PRIORITY(prio);
 
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
@@ -1724,6 +1837,16 @@ get_engines(struct i915_gem_context *ctx,
 	return err;
 }
 
+static int
+set_persistence(struct i915_gem_context *ctx,
+		const struct drm_i915_gem_context_param *args)
+{
+	if (args->size)
+		return -EINVAL;
+
+	return __context_set_persistence(ctx, args->value);
+}
+
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
 			struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
@@ -1801,6 +1924,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
 		ret = set_engines(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		ret = set_persistence(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -2250,6 +2377,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		ret = get_engines(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		args->size = 0;
+		args->value = i915_gem_context_is_persistent(ctx);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 176978608b6f..e0f5b6c6a331 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -74,6 +74,21 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c
 	clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
 }
 
+static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
 static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
 {
 	return test_bit(CONTEXT_BANNED, &ctx->flags);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 260d59cc3de8..daf1ea5075a6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -137,6 +137,7 @@ struct i915_gem_context {
 #define UCONTEXT_NO_ERROR_CAPTURE	1
 #define UCONTEXT_BANNABLE		2
 #define UCONTEXT_RECOVERABLE		3
+#define UCONTEXT_PERSISTENCE		4
 
 	/**
 	 * @flags: small set of booleans
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index be8974ccff24..0d1a275ec316 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -23,6 +23,8 @@ mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
+	i915_gem_context_set_persistence(ctx);
+
 	mutex_init(&ctx->engines_mutex);
 	e = default_engines(ctx);
 	if (IS_ERR(e))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
new file mode 100644
index 000000000000..2fc413f9d506
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -0,0 +1,56 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_request.h"
+
+#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_engine.h"
+#include "intel_gt.h"
+
+static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
+{
+	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
+	i915_request_add_active_barriers(rq);
+}
+
+int intel_engine_pulse(struct intel_engine_cs *engine)
+{
+	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
+	struct intel_context *ce = engine->kernel_context;
+	struct i915_request *rq;
+	int err = 0;
+
+	if (!intel_engine_has_preemption(engine))
+		return -ENODEV;
+
+	if (!intel_engine_pm_get_if_awake(engine))
+		return 0;
+
+	if (mutex_lock_interruptible(&ce->timeline->mutex))
+		goto out_rpm;
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+	intel_context_exit(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unlock;
+	}
+
+	rq->flags |= I915_REQUEST_SENTINEL;
+	idle_pulse(engine, rq);
+
+	__i915_request_commit(rq);
+	__i915_request_queue(rq, &attr);
+
+out_unlock:
+	mutex_unlock(&ce->timeline->mutex);
+out_rpm:
+	intel_engine_pm_put(engine);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
new file mode 100644
index 000000000000..b950451b5998
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_HEARTBEAT_H
+#define INTEL_ENGINE_HEARTBEAT_H
+
+struct intel_engine_cs;
+
+int intel_engine_pulse(struct intel_engine_cs *engine);
+
+#endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 8e5e513eddc9..86c0b2280b7b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -111,7 +111,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	i915_request_add_active_barriers(rq);
 
 	/* Install ourselves as a preemption barrier */
-	rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
+	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	__i915_request_commit(rq);
 
 	/* Release our exclusive hold on the engine */
diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
index 21037a2e2038..ae8bb3cb627e 100644
--- a/drivers/gpu/drm/i915/i915_priolist_types.h
+++ b/drivers/gpu/drm/i915/i915_priolist_types.h
@@ -39,6 +39,7 @@ enum {
  * active request.
  */
 #define I915_PRIORITY_UNPREEMPTABLE INT_MAX
+#define I915_PRIORITY_BARRIER INT_MAX
 
 #define __NO_PREEMPTION (I915_PRIORITY_WAIT)
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 30c542144016..eb9e704d717a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1565,6 +1565,21 @@ struct drm_i915_gem_context_param {
  *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
  */
 #define I915_CONTEXT_PARAM_ENGINES	0xa
+
+/*
+ * I915_CONTEXT_PARAM_PERSISTENCE:
+ *
+ * Allow the context and active rendering to survive the process until
+ * completion. Persistence allows fire-and-forget clients to queue up a
+ * bunch of work, hand the output over to a display server and the quit.
+ * If the context is not marked as persistent, upon closing (either via
+ * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure
+ * or process termination), the context and any outstanding requests will be
+ * cancelled (and exported fences for cancelled requests marked as -EIO).
+ *
+ * By default, new contexts allow persistence.
+ */
+#define I915_CONTEXT_PARAM_PERSISTENCE	0xb
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close
  2019-10-02 13:52   ` Bloomfield, Jon
  2019-10-02 14:05     ` Chris Wilson
@ 2019-10-02 14:23     ` Chris Wilson
  2019-10-02 14:40       ` Bloomfield, Jon
  1 sibling, 1 reply; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 14:23 UTC (permalink / raw)
  To: Bloomfield, Jon, intel-gfx

Quoting Bloomfield, Jon (2019-10-02 14:52:32)
> 
> 
> > -----Original Message-----
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> > Sent: Wednesday, October 2, 2019 4:20 AM
> > To: intel-gfx@lists.freedesktop.org
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>; Joonas Lahtinen
> > <joonas.lahtinen@linux.intel.com>; Winiarski, Michal
> > <michal.winiarski@intel.com>; Bloomfield, Jon <jon.bloomfield@intel.com>
> > Subject: [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close
> > 
> > Normally, we rely on our hangcheck to prevent persistent batches from
> > hogging the GPU. However, if the user disables hangcheck, this mechanism
> > breaks down. Despite our insistence that this is unsafe, the users are
> > equally insistent that they want to use endless batches and will disable
> > the hangcheck mechanism. We are looking at perhaps replacing hangcheck
> > with a softer mechanism, that sends a pulse down the engine to check if
> > it is well. We can use the same preemptive pulse to flush an active
> > persistent context off the GPU upon context close, preventing resources
> > being lost and unkillable requests remaining on the GPU after process
> > termination. To avoid changing the ABI and accidentally breaking
> > existing userspace, we make the persistence of a context explicit and
> > enable it by default (matching current ABI). Userspace can opt out of
> > persistent mode (forcing requests to be cancelled when the context is
> > closed by process termination or explicitly) by a context parameter. To
> > facilitate existing use-cases of disabling hangcheck, if the modparam is
> > disabled (i915.enable_hangcheck=0), we disable persistence mode by
> > default.  (Note, one of the outcomes for supporting endless mode will be
> > the removal of hangchecking, at which point opting into persistent mode
> > will be mandatory, or maybe the default perhaps controlled by cgroups.)
> > 
> > Testcase: igt/gem_ctx_persistence
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> > Cc: Jon Bloomfield <jon.bloomfield@intel.com>
> > Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
> > ---
> >  drivers/gpu/drm/i915/Makefile                 |   1 +
> >  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 122 ++++++++++++++++++
> >  drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 +++
> >  .../gpu/drm/i915/gem/i915_gem_context_types.h |   1 +
> >  .../gpu/drm/i915/gem/selftests/mock_context.c |   2 +
> >  .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  56 ++++++++
> >  .../gpu/drm/i915/gt/intel_engine_heartbeat.h  |  14 ++
> >  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   2 +-
> >  drivers/gpu/drm/i915/i915_priolist_types.h    |   1 +
> >  include/uapi/drm/i915_drm.h                   |  15 +++
> >  10 files changed, 228 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
> >  create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
> > 
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 1f87c70a2842..561fa2bb3006 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -76,6 +76,7 @@ gt-y += \
> >       gt/intel_breadcrumbs.o \
> >       gt/intel_context.o \
> >       gt/intel_engine_cs.o \
> > +     gt/intel_engine_heartbeat.o \
> >       gt/intel_engine_pm.o \
> >       gt/intel_engine_pool.o \
> >       gt/intel_engine_sysfs.o \
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index 0ab416887fc2..e832238a72e5 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> 
> <SNIP>
> 
> > +static int
> > +set_persistence(struct i915_gem_context *ctx,
> > +             const struct drm_i915_gem_context_param *args)
> > +{
> > +     if (args->size)
> > +             return -EINVAL;
> > +
> > +     if (args->value) {
> > +             i915_gem_context_set_persistence(ctx);
> > +             return 0;
> > +     }
> > +
> > +     /* To cancel a context we use "preempt-to-idle" */
> > +     if (!(ctx->i915->caps.scheduler &
> > I915_SCHEDULER_CAP_PREEMPTION))
> > +             return -ENODEV;
> > +
> > +     i915_gem_context_clear_persistence(ctx);
> > +     return 0;
> > +}
> 
> Hmmn. Given that disabling hangcheck is an explicit operation, and we already change the default setting, can't we make it a hard requirement that persistence requires hangcheck? You should not really be able to opt back in to persistence if hangcheck is disabled. In fact you could just test for hangcheck when deciding whether to kill the context, and force-kill if it is off - that way if hangcheck is disabled after a context starts it will get cleaned up.

Just great, now I got to update the igt to treat i915.enable_hangcheck
as API!
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 21/30] drm/i915: Replace hangcheck by heartbeats
  2019-10-02 13:55   ` Bloomfield, Jon
@ 2019-10-02 14:28     ` Chris Wilson
  0 siblings, 0 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 14:28 UTC (permalink / raw)
  To: Bloomfield, Jon, intel-gfx

Quoting Bloomfield, Jon (2019-10-02 14:55:32)
> > -----Original Message-----
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> > Sent: Wednesday, October 2, 2019 4:20 AM
> > To: intel-gfx@lists.freedesktop.org
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>; Joonas Lahtinen
> > <joonas.lahtinen@linux.intel.com>; Ursulin, Tvrtko <tvrtko.ursulin@intel.com>;
> > Bloomfield, Jon <jon.bloomfield@intel.com>
> > Subject: [PATCH 21/30] drm/i915: Replace hangcheck by heartbeats
> > 
> > Replace sampling the engine state every so often with a periodic
> > heartbeat request to measure the health of an engine. This is coupled
> > with the forced-preemption to allow long running requests to survive so
> > long as they do not block other users.
> > 
> > The heartbeat interval can be adjusted per-engine using,
> > 
> >       /sys/class/drm/card0/engine/*/heartbeat_interval_ms
> 
> Not true for discrete :-)
> 
> Perhaps: /sys/class/drm/card<n>/engine/*/heartbeat_interval_ms
> And again in the code below.

I left the extrapolation to the user -- I thought it was clear enough
without saying cardN, card<N>, or card?. Though since it's using
globing in one location card?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2] drm/i915: Cancel non-persistent contexts on close
  2019-10-02 14:23   ` [PATCH v2] " Chris Wilson
@ 2019-10-02 14:36     ` Bloomfield, Jon
  0 siblings, 0 replies; 50+ messages in thread
From: Bloomfield, Jon @ 2019-10-02 14:36 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

> -----Original Message-----
> From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: Wednesday, October 2, 2019 7:23 AM
> To: intel-gfx@lists.freedesktop.org
> Cc: Chris Wilson <chris@chris-wilson.co.uk>; Joonas Lahtinen
> <joonas.lahtinen@linux.intel.com>; Winiarski, Michal
> <michal.winiarski@intel.com>; Bloomfield, Jon <jon.bloomfield@intel.com>
> Subject: [PATCH v2] drm/i915: Cancel non-persistent contexts on close
> 
> Normally, we rely on our hangcheck to prevent persistent batches from
> hogging the GPU. However, if the user disables hangcheck, this mechanism
> breaks down. Despite our insistence that this is unsafe, the users are
> equally insistent that they want to use endless batches and will disable
> the hangcheck mechanism. We are looking at perhaps replacing hangcheck
> with a softer mechanism, that sends a pulse down the engine to check if
> it is well. We can use the same preemptive pulse to flush an active
> persistent context off the GPU upon context close, preventing resources
> being lost and unkillable requests remaining on the GPU after process
> termination. To avoid changing the ABI and accidentally breaking
> existing userspace, we make the persistence of a context explicit and
> enable it by default (matching current ABI). Userspace can opt out of
> persistent mode (forcing requests to be cancelled when the context is
> closed by process termination or explicitly) by a context parameter. To
> facilitate existing use-cases of disabling hangcheck, if the modparam is
> disabled (i915.enable_hangcheck=0), we disable persistence mode by
> default.  (Note, one of the outcomes for supporting endless mode will be
> the removal of hangchecking, at which point opting into persistent mode
> will be mandatory, or maybe the default perhaps controlled by cgroups.)
> 
> v2: Check for hangchecking at context termination, so that we are not
> left with undying contexts from a craft user.

s/craft/crafty/ unless we only care about sailors.

Otherwise:
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close
  2019-10-02 14:23     ` Chris Wilson
@ 2019-10-02 14:40       ` Bloomfield, Jon
  0 siblings, 0 replies; 50+ messages in thread
From: Bloomfield, Jon @ 2019-10-02 14:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

> -----Original Message-----
> From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: Wednesday, October 2, 2019 7:24 AM
> To: Bloomfield, Jon <jon.bloomfield@intel.com>; intel-
> gfx@lists.freedesktop.org
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>; Winiarski, Michal
> <michal.winiarski@intel.com>
> Subject: RE: [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close
> 
> Quoting Bloomfield, Jon (2019-10-02 14:52:32)
> >
> >
> >
> > Hmmn. Given that disabling hangcheck is an explicit operation, and we
> already change the default setting, can't we make it a hard requirement that
> persistence requires hangcheck? You should not really be able to opt back in to
> persistence if hangcheck is disabled. In fact you could just test for hangcheck
> when deciding whether to kill the context, and force-kill if it is off - that way if
> hangcheck is disabled after a context starts it will get cleaned up.
> 
> Just great, now I got to update the igt to treat i915.enable_hangcheck
> as API!
> -Chris
Don't blame me ;-)
I'm in damage limitation mode. I'd prefer we didn't have persistence at all.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 05/30] drm/i915: Mark up address spaces that may need to allocate
  2019-10-02 11:19 ` [PATCH 05/30] drm/i915: Mark up address spaces that may need to allocate Chris Wilson
@ 2019-10-02 15:47   ` Tvrtko Ursulin
  0 siblings, 0 replies; 50+ messages in thread
From: Tvrtko Ursulin @ 2019-10-02 15:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 02/10/2019 12:19, Chris Wilson wrote:
> Since we cannot allocate underneath the vm->mutex (it is used in the
> direct-reclaim paths), we need to shift the allocations off into a
> mutexless worker with fence recursion prevention. To know when we need
> this protection, we mark up the address spaces that do allocate before
> insertion. In the future, we may wish to extend the async bind scheme to
> more than just allocations.
> 
> v2: s/vm->bind_alloc/vm->bind_async_flags/
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
>   drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++
>   2 files changed, 5 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ad9eb2d68f3f..8eba63ecdb03 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1502,6 +1502,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>   			goto err_free_pd;
>   	}
>   
> +	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>   	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>   	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
>   	ppgtt->vm.clear_range = gen8_ppgtt_clear;
> @@ -1950,6 +1951,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
>   	ppgtt_init(&ppgtt->base, &i915->gt);
>   	ppgtt->base.vm.top = 1;
>   
> +	ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>   	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
>   	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
>   	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
> @@ -2581,6 +2583,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
>   		goto err_ppgtt;
>   
>   	ggtt->alias = ppgtt;
> +	ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
>   
>   	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
>   	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index bbdc735466c1..3502b9c85a8e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -305,6 +305,8 @@ struct i915_address_space {
>   	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
>   	u64 reserved;		/* size addr space reserved */
>   
> +	unsigned int bind_async_flags;
> +
>   	bool closed;
>   
>   	struct mutex mutex; /* protects vma and our lists */
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 08/30] drm/i915: Coordinate i915_active with its own mutex
  2019-10-02 11:19 ` [PATCH 08/30] drm/i915: Coordinate i915_active with its own mutex Chris Wilson
@ 2019-10-02 15:49   ` Tvrtko Ursulin
  0 siblings, 0 replies; 50+ messages in thread
From: Tvrtko Ursulin @ 2019-10-02 15:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 02/10/2019 12:19, Chris Wilson wrote:
> Forgo the struct_mutex serialisation for i915_active, and interpose its
> own mutex handling for active/retire.
> 
> This is a multi-layered sleight-of-hand. First, we had to ensure that no
> active/retire callbacks accidentally inverted the mutex ordering rules,
> nor assumed that they were themselves serialised by struct_mutex. More
> challenging though, is the rule over updating elements of the active
> rbtree. Instead of the whole i915_active now being serialised by
> struct_mutex, allocations/rotations of the tree are serialised by the
> i915_active.mutex and individual nodes are serialised by the caller
> using the i915_timeline.mutex (we need to use nested spinlocks to
> interact with the dma_fence callback lists).
> 
> The pain point here is that instead of a single mutex around execbuf, we
> now have to take a mutex for active tracker (one for each vma, context,
> etc) and a couple of spinlocks for each fence update. The improvement in
> fine grained locking allowing for multiple concurrent clients
> (eventually!) should be worth it in typical loads.
> 
> v2: Add some comments that barely elucidate anything :(

Here's one r-b which is barely worth anything as well: ;)

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   .../gpu/drm/i915/display/intel_frontbuffer.c  |   2 +-
>   drivers/gpu/drm/i915/display/intel_overlay.c  |   3 +-
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   |   4 +-
>   .../gpu/drm/i915/gem/i915_gem_object_types.h  |   1 +
>   drivers/gpu/drm/i915/gem/i915_gem_pm.c        |   9 +-
>   drivers/gpu/drm/i915/gt/intel_context.c       |   4 +-
>   drivers/gpu/drm/i915/gt/intel_engine_pool.c   |   2 +-
>   drivers/gpu/drm/i915/gt/intel_reset.c         |  10 +-
>   drivers/gpu/drm/i915/gt/intel_timeline.c      |   7 +-
>   .../gpu/drm/i915/gt/intel_timeline_types.h    |   9 +-
>   drivers/gpu/drm/i915/gt/selftest_context.c    |  16 +-
>   drivers/gpu/drm/i915/gt/selftest_lrc.c        |  10 +-
>   .../gpu/drm/i915/gt/selftests/mock_timeline.c |   2 +-
>   drivers/gpu/drm/i915/gvt/scheduler.c          |   3 -
>   drivers/gpu/drm/i915/i915_active.c            | 322 +++++++++---------
>   drivers/gpu/drm/i915/i915_active.h            | 319 ++++-------------
>   drivers/gpu/drm/i915/i915_active_types.h      |  23 +-
>   drivers/gpu/drm/i915/i915_gem.c               |  42 ++-
>   drivers/gpu/drm/i915/i915_gem_gtt.c           |   3 +-
>   drivers/gpu/drm/i915/i915_gpu_error.c         |   4 +-
>   drivers/gpu/drm/i915/i915_request.c           |  38 +--
>   drivers/gpu/drm/i915/i915_request.h           |   1 -
>   drivers/gpu/drm/i915/i915_vma.c               |   4 +-
>   drivers/gpu/drm/i915/selftests/i915_active.c  |  32 +-
>   24 files changed, 298 insertions(+), 572 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> index 6428b8dd70d3..84b164f31895 100644
> --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
> @@ -257,7 +257,7 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
>   	front->obj = obj;
>   	kref_init(&front->ref);
>   	atomic_set(&front->bits, 0);
> -	i915_active_init(i915, &front->write,
> +	i915_active_init(&front->write,
>   			 frontbuffer_active,
>   			 i915_active_may_sleep(frontbuffer_retire));
>   
> diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
> index 3f4ac1ee7668..e12e1a753af0 100644
> --- a/drivers/gpu/drm/i915/display/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/display/intel_overlay.c
> @@ -1360,8 +1360,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
>   	overlay->contrast = 75;
>   	overlay->saturation = 146;
>   
> -	i915_active_init(dev_priv,
> -			 &overlay->last_flip,
> +	i915_active_init(&overlay->last_flip,
>   			 NULL, intel_overlay_last_flip_retire);
>   
>   	ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 4cd7d2ecf1d5..9d85aab68d34 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -868,20 +868,18 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   				void (*task)(void *data),
>   				void *data)
>   {
> -	struct drm_i915_private *i915 = ctx->i915;
>   	struct context_barrier_task *cb;
>   	struct i915_gem_engines_iter it;
>   	struct intel_context *ce;
>   	int err = 0;
>   
> -	lockdep_assert_held(&i915->drm.struct_mutex);
>   	GEM_BUG_ON(!task);
>   
>   	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
>   	if (!cb)
>   		return -ENOMEM;
>   
> -	i915_active_init(i915, &cb->base, NULL, cb_retire);
> +	i915_active_init(&cb->base, NULL, cb_retire);
>   	err = i915_active_acquire(&cb->base);
>   	if (err) {
>   		kfree(cb);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index e1aab2fd1cd9..c00b4f077f9e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -8,6 +8,7 @@
>   #define __I915_GEM_OBJECT_TYPES_H__
>   
>   #include <drm/drm_gem.h>
> +#include <uapi/drm/i915_drm.h>
>   
>   #include "i915_active.h"
>   #include "i915_selftest.h"
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index cca192ecff8b..0a4115c6c275 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -16,14 +16,11 @@ static void call_idle_barriers(struct intel_engine_cs *engine)
>   	struct llist_node *node, *next;
>   
>   	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
> -		struct i915_active_request *active =
> +		struct dma_fence_cb *cb =
>   			container_of((struct list_head *)node,
> -				     typeof(*active), link);
> +				     typeof(*cb), node);
>   
> -		INIT_LIST_HEAD(&active->link);
> -		RCU_INIT_POINTER(active->request, NULL);
> -
> -		active->retire(active, NULL);
> +		cb->func(NULL, cb);
>   	}
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 06fabdf205cf..35a40c2820a2 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -240,7 +240,7 @@ intel_context_init(struct intel_context *ce,
>   
>   	mutex_init(&ce->pin_mutex);
>   
> -	i915_active_init(ctx->i915, &ce->active,
> +	i915_active_init(&ce->active,
>   			 __intel_context_active, __intel_context_retire);
>   }
>   
> @@ -307,7 +307,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
>   			return err;
>   
>   		/* Queue this switch after current activity by this context. */
> -		err = i915_active_request_set(&tl->last_request, rq);
> +		err = i915_active_fence_set(&tl->last_request, rq);
>   		mutex_unlock(&tl->mutex);
>   		if (err)
>   			return err;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
> index 81fab101fdb4..3cdbd5f8b5be 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
> @@ -95,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
>   		return ERR_PTR(-ENOMEM);
>   
>   	node->pool = pool;
> -	i915_active_init(engine->i915, &node->active, pool_active, pool_retire);
> +	i915_active_init(&node->active, pool_active, pool_retire);
>   
>   	obj = i915_gem_object_create_internal(engine->i915, sz);
>   	if (IS_ERR(obj)) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index e189897e8797..055496f0825f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -844,10 +844,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
>   	 */
>   	spin_lock_irqsave(&timelines->lock, flags);
>   	list_for_each_entry(tl, &timelines->active_list, link) {
> -		struct i915_request *rq;
> +		struct dma_fence *fence;
>   
> -		rq = i915_active_request_get_unlocked(&tl->last_request);
> -		if (!rq)
> +		fence = i915_active_fence_get(&tl->last_request);
> +		if (!fence)
>   			continue;
>   
>   		spin_unlock_irqrestore(&timelines->lock, flags);
> @@ -859,8 +859,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
>   		 * (I915_FENCE_TIMEOUT) so this wait should not be unbounded
>   		 * in the worst case.
>   		 */
> -		dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
> -		i915_request_put(rq);
> +		dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
> +		dma_fence_put(fence);
>   
>   		/* Restart iteration after droping lock */
>   		spin_lock_irqsave(&timelines->lock, flags);
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 653f60e78392..0f959694303c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -178,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
>   	cl->hwsp = hwsp;
>   	cl->vaddr = page_pack_bits(vaddr, cacheline);
>   
> -	i915_active_init(hwsp->gt->i915, &cl->active,
> -			 __cacheline_active, __cacheline_retire);
> +	i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
>   
>   	return cl;
>   }
> @@ -255,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
>   
>   	mutex_init(&timeline->mutex);
>   
> -	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
> +	INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
>   	INIT_LIST_HEAD(&timeline->requests);
>   
>   	i915_syncmap_init(&timeline->sync);
> @@ -443,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
>   	 * free it after the current request is retired, which ensures that
>   	 * all writes into the cacheline from previous requests are complete.
>   	 */
> -	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
> +	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
>   	if (err)
>   		goto err_cacheline;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> index c668c4c50e75..98d9ee166379 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> @@ -58,12 +58,13 @@ struct intel_timeline {
>   	 */
>   	struct list_head requests;
>   
> -	/* Contains an RCU guarded pointer to the last request. No reference is
> +	/*
> +	 * Contains an RCU guarded pointer to the last request. No reference is
>   	 * held to the request, users must carefully acquire a reference to
> -	 * the request using i915_active_request_get_request_rcu(), or hold the
> -	 * struct_mutex.
> +	 * the request using i915_active_fence_get(), or manage the RCU
> +	 * protection themselves (cf the i915_active_fence API).
>   	 */
> -	struct i915_active_request last_request;
> +	struct i915_active_fence last_request;
>   
>   	/**
>   	 * We track the most recent seqno that we wait on in every context so
> diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
> index 4ce1e25433d2..e6bcbe7ab5e1 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_context.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_context.c
> @@ -47,24 +47,20 @@ static int context_sync(struct intel_context *ce)
>   
>   	mutex_lock(&tl->mutex);
>   	do {
> -		struct i915_request *rq;
> +		struct dma_fence *fence;
>   		long timeout;
>   
> -		rcu_read_lock();
> -		rq = rcu_dereference(tl->last_request.request);
> -		if (rq)
> -			rq = i915_request_get_rcu(rq);
> -		rcu_read_unlock();
> -		if (!rq)
> +		fence = i915_active_fence_get(&tl->last_request);
> +		if (!fence)
>   			break;
>   
> -		timeout = i915_request_wait(rq, 0, HZ / 10);
> +		timeout = dma_fence_wait_timeout(fence, false, HZ / 10);
>   		if (timeout < 0)
>   			err = timeout;
>   		else
> -			i915_request_retire_upto(rq);
> +			i915_request_retire_upto(to_request(fence));
>   
> -		i915_request_put(rq);
> +		dma_fence_put(fence);
>   	} while (!err);
>   	mutex_unlock(&tl->mutex);
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 2da4fa5149cb..01d5a26b9aa7 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1176,9 +1176,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
>   	if (!rq)
>   		return NULL;
>   
> -	INIT_LIST_HEAD(&rq->active_list);
>   	rq->engine = engine;
>   
> +	spin_lock_init(&rq->lock);
> +	INIT_LIST_HEAD(&rq->fence.cb_list);
> +	rq->fence.lock = &rq->lock;
> +	rq->fence.ops = &i915_fence_ops;
> +
>   	i915_sched_node_init(&rq->sched);
>   
>   	/* mark this request as permanently incomplete */
> @@ -1271,8 +1275,8 @@ static int live_suppress_wait_preempt(void *arg)
>   				}
>   
>   				/* Disable NEWCLIENT promotion */
> -				__i915_active_request_set(&i915_request_timeline(rq[i])->last_request,
> -							  dummy);
> +				__i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
> +							&dummy->fence);
>   				i915_request_add(rq[i]);
>   			}
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
> index 598170efcaf6..2a77c051f36a 100644
> --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
> @@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
>   
>   	mutex_init(&timeline->mutex);
>   
> -	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
> +	INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
>   	INIT_LIST_HEAD(&timeline->requests);
>   
>   	i915_syncmap_init(&timeline->sync);
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 6c79d16b381e..03f567084548 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -385,11 +385,8 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
>   {
>   	struct intel_vgpu *vgpu = workload->vgpu;
>   	struct intel_vgpu_submission *s = &vgpu->submission;
> -	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
>   	struct i915_request *rq;
>   
> -	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> -
>   	if (workload->req)
>   		return 0;
>   
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 7ca066688b98..023652ded4be 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -12,8 +12,6 @@
>   #include "i915_active.h"
>   #include "i915_globals.h"
>   
> -#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
> -
>   /*
>    * Active refs memory management
>    *
> @@ -27,35 +25,35 @@ static struct i915_global_active {
>   } global;
>   
>   struct active_node {
> -	struct i915_active_request base;
> +	struct i915_active_fence base;
>   	struct i915_active *ref;
>   	struct rb_node node;
>   	u64 timeline;
>   };
>   
>   static inline struct active_node *
> -node_from_active(struct i915_active_request *active)
> +node_from_active(struct i915_active_fence *active)
>   {
>   	return container_of(active, struct active_node, base);
>   }
>   
>   #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)
>   
> -static inline bool is_barrier(const struct i915_active_request *active)
> +static inline bool is_barrier(const struct i915_active_fence *active)
>   {
> -	return IS_ERR(rcu_access_pointer(active->request));
> +	return IS_ERR(rcu_access_pointer(active->fence));
>   }
>   
>   static inline struct llist_node *barrier_to_ll(struct active_node *node)
>   {
>   	GEM_BUG_ON(!is_barrier(&node->base));
> -	return (struct llist_node *)&node->base.link;
> +	return (struct llist_node *)&node->base.cb.node;
>   }
>   
>   static inline struct intel_engine_cs *
>   __barrier_to_engine(struct active_node *node)
>   {
> -	return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev);
> +	return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
>   }
>   
>   static inline struct intel_engine_cs *
> @@ -68,7 +66,7 @@ barrier_to_engine(struct active_node *node)
>   static inline struct active_node *barrier_from_ll(struct llist_node *x)
>   {
>   	return container_of((struct list_head *)x,
> -			    struct active_node, base.link);
> +			    struct active_node, base.cb.node);
>   }
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)
> @@ -147,15 +145,18 @@ __active_retire(struct i915_active *ref)
>   	if (!retire)
>   		return;
>   
> -	GEM_BUG_ON(rcu_access_pointer(ref->excl));
> +	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
>   	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
> -		GEM_BUG_ON(i915_active_request_isset(&it->base));
> +		GEM_BUG_ON(i915_active_fence_isset(&it->base));
>   		kmem_cache_free(global.slab_cache, it);
>   	}
>   
>   	/* After the final retire, the entire struct may be freed */
>   	if (ref->retire)
>   		ref->retire(ref);
> +
> +	/* ... except if you wait on it, you must manage your own references! */
> +	wake_up_var(ref);
>   }
>   
>   static void
> @@ -189,12 +190,20 @@ active_retire(struct i915_active *ref)
>   }
>   
>   static void
> -node_retire(struct i915_active_request *base, struct i915_request *rq)
> +node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
>   {
> -	active_retire(node_from_active(base)->ref);
> +	i915_active_fence_cb(fence, cb);
> +	active_retire(container_of(cb, struct active_node, base.cb)->ref);
>   }
>   
> -static struct i915_active_request *
> +static void
> +excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
> +{
> +	i915_active_fence_cb(fence, cb);
> +	active_retire(container_of(cb, struct i915_active, excl.cb));
> +}
> +
> +static struct i915_active_fence *
>   active_instance(struct i915_active *ref, struct intel_timeline *tl)
>   {
>   	struct active_node *node, *prealloc;
> @@ -238,7 +247,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
>   	}
>   
>   	node = prealloc;
> -	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
> +	__i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire);
>   	node->ref = ref;
>   	node->timeline = idx;
>   
> @@ -253,8 +262,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
>   	return &node->base;
>   }
>   
> -void __i915_active_init(struct drm_i915_private *i915,
> -			struct i915_active *ref,
> +void __i915_active_init(struct i915_active *ref,
>   			int (*active)(struct i915_active *ref),
>   			void (*retire)(struct i915_active *ref),
>   			struct lock_class_key *key)
> @@ -263,19 +271,18 @@ void __i915_active_init(struct drm_i915_private *i915,
>   
>   	debug_active_init(ref);
>   
> -	ref->i915 = i915;
>   	ref->flags = 0;
>   	ref->active = active;
>   	ref->retire = ptr_unpack_bits(retire, &bits, 2);
>   	if (bits & I915_ACTIVE_MAY_SLEEP)
>   		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
>   
> -	ref->excl = NULL;
>   	ref->tree = RB_ROOT;
>   	ref->cache = NULL;
>   	init_llist_head(&ref->preallocated_barriers);
>   	atomic_set(&ref->count, 0);
>   	__mutex_init(&ref->mutex, "i915_active", key);
> +	__i915_active_fence_init(&ref->excl, &ref->mutex, NULL, excl_retire);
>   	INIT_WORK(&ref->work, active_work);
>   }
>   
> @@ -329,9 +336,9 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
>   
>   int i915_active_ref(struct i915_active *ref,
>   		    struct intel_timeline *tl,
> -		    struct i915_request *rq)
> +		    struct dma_fence *fence)
>   {
> -	struct i915_active_request *active;
> +	struct i915_active_fence *active;
>   	int err;
>   
>   	lockdep_assert_held(&tl->mutex);
> @@ -354,66 +361,44 @@ int i915_active_ref(struct i915_active *ref,
>   		 * request that we want to emit on the kernel_context.
>   		 */
>   		__active_del_barrier(ref, node_from_active(active));
> -		RCU_INIT_POINTER(active->request, NULL);
> -		INIT_LIST_HEAD(&active->link);
> -	} else {
> -		if (!i915_active_request_isset(active))
> -			atomic_inc(&ref->count);
> +		RCU_INIT_POINTER(active->fence, NULL);
> +		atomic_dec(&ref->count);
>   	}
> -	GEM_BUG_ON(!atomic_read(&ref->count));
> -	__i915_active_request_set(active, rq);
> +	if (!__i915_active_fence_set(active, fence))
> +		atomic_inc(&ref->count);
>   
>   out:
>   	i915_active_release(ref);
>   	return err;
>   }
>   
> -static void excl_cb(struct dma_fence *f, struct dma_fence_cb *cb)
> -{
> -	struct i915_active *ref = container_of(cb, typeof(*ref), excl_cb);
> -
> -	RCU_INIT_POINTER(ref->excl, NULL);
> -	dma_fence_put(f);
> -
> -	active_retire(ref);
> -}
> -
>   void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
>   {
>   	/* We expect the caller to manage the exclusive timeline ordering */
>   	GEM_BUG_ON(i915_active_is_idle(ref));
>   
> -	dma_fence_get(f);
> -
> -	rcu_read_lock();
> -	if (rcu_access_pointer(ref->excl)) {
> -		struct dma_fence *old;
> -
> -		old = dma_fence_get_rcu_safe(&ref->excl);
> -		if (old) {
> -			if (dma_fence_remove_callback(old, &ref->excl_cb))
> -				atomic_dec(&ref->count);
> -			dma_fence_put(old);
> -		}
> -	}
> -	rcu_read_unlock();
> -
> -	atomic_inc(&ref->count);
> -	rcu_assign_pointer(ref->excl, f);
> +	/*
> +	 * As we don't know which mutex the caller is using, we told a small
> +	 * lie to the debug code that it is using the i915_active.mutex;
> +	 * and now we must stick to that lie.
> +	 */
> +	mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_);
> +	if (!__i915_active_fence_set(&ref->excl, f))
> +		atomic_inc(&ref->count);
> +	mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_);
> +}
>   
> -	if (dma_fence_add_callback(f, &ref->excl_cb, excl_cb)) {
> -		RCU_INIT_POINTER(ref->excl, NULL);
> -		atomic_dec(&ref->count);
> -		dma_fence_put(f);
> -	}
> +bool i915_active_acquire_if_busy(struct i915_active *ref)
> +{
> +	debug_active_assert(ref);
> +	return atomic_add_unless(&ref->count, 1, 0);
>   }
>   
>   int i915_active_acquire(struct i915_active *ref)
>   {
>   	int err;
>   
> -	debug_active_assert(ref);
> -	if (atomic_add_unless(&ref->count, 1, 0))
> +	if (i915_active_acquire_if_busy(ref))
>   		return 0;
>   
>   	err = mutex_lock_interruptible(&ref->mutex);
> @@ -438,121 +423,57 @@ void i915_active_release(struct i915_active *ref)
>   	active_retire(ref);
>   }
>   
> -static void __active_ungrab(struct i915_active *ref)
> -{
> -	clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags);
> -}
> -
> -bool i915_active_trygrab(struct i915_active *ref)
> +static void enable_signaling(struct i915_active_fence *active)
>   {
> -	debug_active_assert(ref);
> -
> -	if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags))
> -		return false;
> -
> -	if (!atomic_add_unless(&ref->count, 1, 0)) {
> -		__active_ungrab(ref);
> -		return false;
> -	}
> +	struct dma_fence *fence;
>   
> -	return true;
> -}
> -
> -void i915_active_ungrab(struct i915_active *ref)
> -{
> -	GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags));
> -
> -	active_retire(ref);
> -	__active_ungrab(ref);
> -}
> -
> -static int excl_wait(struct i915_active *ref)
> -{
> -	struct dma_fence *old;
> -	int err = 0;
> -
> -	if (!rcu_access_pointer(ref->excl))
> -		return 0;
> -
> -	rcu_read_lock();
> -	old = dma_fence_get_rcu_safe(&ref->excl);
> -	rcu_read_unlock();
> -	if (old) {
> -		err = dma_fence_wait(old, true);
> -		dma_fence_put(old);
> -	}
> +	fence = i915_active_fence_get(active);
> +	if (!fence)
> +		return;
>   
> -	return err;
> +	dma_fence_enable_sw_signaling(fence);
> +	dma_fence_put(fence);
>   }
>   
>   int i915_active_wait(struct i915_active *ref)
>   {
>   	struct active_node *it, *n;
> -	int err;
> +	int err = 0;
>   
>   	might_sleep();
> -	might_lock(&ref->mutex);
> -
> -	if (i915_active_is_idle(ref))
> -		return 0;
> -
> -	err = mutex_lock_interruptible(&ref->mutex);
> -	if (err)
> -		return err;
>   
> -	if (!atomic_add_unless(&ref->count, 1, 0)) {
> -		mutex_unlock(&ref->mutex);
> +	if (!i915_active_acquire_if_busy(ref))
>   		return 0;
> -	}
> -
> -	err = excl_wait(ref);
> -	if (err)
> -		goto out;
>   
> +	/* Flush lazy signals */
> +	enable_signaling(&ref->excl);
>   	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
> -		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
> -			err = -EBUSY;
> -			break;
> -		}
> +		if (is_barrier(&it->base)) /* unconnected idle barrier */
> +			continue;
>   
> -		err = i915_active_request_retire(&it->base, BKL(ref));
> -		if (err)
> -			break;
> +		enable_signaling(&it->base);
>   	}
> +	/* Any fence added after the wait begins will not be auto-signaled */
>   
> -out:
> -	__active_retire(ref);
> +	i915_active_release(ref);
>   	if (err)
>   		return err;
>   
> -	if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE))
> +	if (wait_var_event_interruptible(ref, i915_active_is_idle(ref)))
>   		return -EINTR;
>   
> -	flush_work(&ref->work);
> -	if (!i915_active_is_idle(ref))
> -		return -EBUSY;
> -
>   	return 0;
>   }
>   
> -int i915_request_await_active_request(struct i915_request *rq,
> -				      struct i915_active_request *active)
> -{
> -	struct i915_request *barrier =
> -		i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
> -
> -	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
> -}
> -
>   int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
>   {
>   	int err = 0;
>   
> -	if (rcu_access_pointer(ref->excl)) {
> +	if (rcu_access_pointer(ref->excl.fence)) {
>   		struct dma_fence *fence;
>   
>   		rcu_read_lock();
> -		fence = dma_fence_get_rcu_safe(&ref->excl);
> +		fence = dma_fence_get_rcu_safe(&ref->excl.fence);
>   		rcu_read_unlock();
>   		if (fence) {
>   			err = i915_request_await_dma_fence(rq, fence);
> @@ -578,7 +499,7 @@ void i915_active_fini(struct i915_active *ref)
>   
>   static inline bool is_idle_barrier(struct active_node *node, u64 idx)
>   {
> -	return node->timeline == idx && !i915_active_request_isset(&node->base);
> +	return node->timeline == idx && !i915_active_fence_isset(&node->base);
>   }
>   
>   static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
> @@ -698,13 +619,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>   			node->base.lock =
>   				&engine->kernel_context->timeline->mutex;
>   #endif
> -			RCU_INIT_POINTER(node->base.request, NULL);
> -			node->base.retire = node_retire;
> +			RCU_INIT_POINTER(node->base.fence, NULL);
> +			node->base.cb.func = node_retire;
>   			node->timeline = idx;
>   			node->ref = ref;
>   		}
>   
> -		if (!i915_active_request_isset(&node->base)) {
> +		if (!i915_active_fence_isset(&node->base)) {
>   			/*
>   			 * Mark this as being *our* unconnected proto-node.
>   			 *
> @@ -714,8 +635,8 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>   			 * and then we can use the rb_node and list pointers
>   			 * for our tracking of the pending barrier.
>   			 */
> -			RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
> -			node->base.link.prev = (void *)engine;
> +			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
> +			node->base.cb.node.prev = (void *)engine;
>   			atomic_inc(&ref->count);
>   		}
>   
> @@ -782,44 +703,113 @@ void i915_request_add_active_barriers(struct i915_request *rq)
>   {
>   	struct intel_engine_cs *engine = rq->engine;
>   	struct llist_node *node, *next;
> +	unsigned long flags;
>   
>   	GEM_BUG_ON(intel_engine_is_virtual(engine));
>   	GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
>   
> +	node = llist_del_all(&engine->barrier_tasks);
> +	if (!node)
> +		return;
>   	/*
>   	 * Attach the list of proto-fences to the in-flight request such
>   	 * that the parent i915_active will be released when this request
>   	 * is retired.
>   	 */
> -	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
> -		RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq);
> +	spin_lock_irqsave(&rq->lock, flags);
> +	llist_for_each_safe(node, next, node) {
> +		RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence);
>   		smp_wmb(); /* serialise with reuse_idle_barrier */
> -		list_add_tail((struct list_head *)node, &rq->active_list);
> +		list_add_tail((struct list_head *)node, &rq->fence.cb_list);
> +	}
> +	spin_unlock_irqrestore(&rq->lock, flags);
> +}
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> +#define active_is_held(active) lockdep_is_held((active)->lock)
> +#else
> +#define active_is_held(active) true
> +#endif
> +
> +/*
> + * __i915_active_fence_set: Update the last active fence along its timeline
> + * @active: the active tracker
> + * @fence: the new fence (under construction)
> + *
> + * Records the new @fence as the last active fence along its timeline in
> + * this active tracker, moving the tracking callbacks from the previous
> + * fence onto this one. Returns the previous fence (if not already completed),
> + * which the caller must ensure is executed before the new fence. To ensure
> + * that the order of fences within the timeline of the i915_active_fence is
> + * maintained, it must be locked by the caller.
> + */
> +struct dma_fence *
> +__i915_active_fence_set(struct i915_active_fence *active,
> +			struct dma_fence *fence)
> +{
> +	struct dma_fence *prev;
> +	unsigned long flags;
> +
> +	/* NB: must be serialised by an outer timeline mutex (active->lock) */
> +	spin_lock_irqsave(fence->lock, flags);
> +	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
> +
> +	prev = rcu_dereference_protected(active->fence, active_is_held(active));
> +	if (prev) {
> +		GEM_BUG_ON(prev == fence);
> +		spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
> +		__list_del_entry(&active->cb.node);
> +		spin_unlock(prev->lock); /* serialise with prev->cb_list */
> +
> +		/*
> +		 * active->fence is reset by the callback from inside
> +		 * interrupt context. We need to serialise our list
> +		 * manipulation with the fence->lock to prevent the prev
> +		 * being lost inside an interrupt (it can't be replaced as
> +		 * no other caller is allowed to enter __i915_active_fence_set
> +		 * as we hold the timeline lock). After serialising with
> +		 * the callback, we need to double check which ran first,
> +		 * our list_del() [decoupling prev from the callback] or
> +		 * the callback...
> +		 */
> +		prev = rcu_access_pointer(active->fence);
>   	}
> +
> +	rcu_assign_pointer(active->fence, fence);
> +	list_add_tail(&active->cb.node, &fence->cb_list);
> +
> +	spin_unlock_irqrestore(fence->lock, flags);
> +
> +	return prev;
>   }
>   
> -int i915_active_request_set(struct i915_active_request *active,
> -			    struct i915_request *rq)
> +int i915_active_fence_set(struct i915_active_fence *active,
> +			  struct i915_request *rq)
>   {
> -	int err;
> +	struct dma_fence *fence;
> +	int err = 0;
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>   	lockdep_assert_held(active->lock);
>   #endif
>   
> -	/* Must maintain ordering wrt previous active requests */
> -	err = i915_request_await_active_request(rq, active);
> -	if (err)
> -		return err;
> +	/* Must maintain timeline ordering wrt previous active requests */
> +	rcu_read_lock();
> +	fence = __i915_active_fence_set(active, &rq->fence);
> +	if (fence) /* but the previous fence may not belong to that timeline! */
> +		fence = dma_fence_get_rcu(fence);
> +	rcu_read_unlock();
> +	if (fence) {
> +		err = i915_request_await_dma_fence(rq, fence);
> +		dma_fence_put(fence);
> +	}
>   
> -	__i915_active_request_set(active, rq);
> -	return 0;
> +	return err;
>   }
>   
> -void i915_active_retire_noop(struct i915_active_request *active,
> -			     struct i915_request *request)
> +void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
>   {
> -	/* Space left intentionally blank */
> +	i915_active_fence_cb(fence, cb);
>   }
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 90034f61b7c2..4f52fe6146d2 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -12,6 +12,10 @@
>   #include "i915_active_types.h"
>   #include "i915_request.h"
>   
> +struct i915_request;
> +struct intel_engine_cs;
> +struct intel_timeline;
> +
>   /*
>    * We treat requests as fences. This is not be to confused with our
>    * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
> @@ -28,308 +32,108 @@
>    * write access so that we can perform concurrent read operations between
>    * the CPU and GPU engines, as well as waiting for all rendering to
>    * complete, or waiting for the last GPU user of a "fence register". The
> - * object then embeds a #i915_active_request to track the most recent (in
> + * object then embeds a #i915_active_fence to track the most recent (in
>    * retirement order) request relevant for the desired mode of access.
> - * The #i915_active_request is updated with i915_active_request_set() to
> + * The #i915_active_fence is updated with i915_active_fence_set() to
>    * track the most recent fence request, typically this is done as part of
>    * i915_vma_move_to_active().
>    *
> - * When the #i915_active_request completes (is retired), it will
> + * When the #i915_active_fence completes (is retired), it will
>    * signal its completion to the owner through a callback as well as mark
> - * itself as idle (i915_active_request.request == NULL). The owner
> + * itself as idle (i915_active_fence.request == NULL). The owner
>    * can then perform any action, such as delayed freeing of an active
>    * resource including itself.
>    */
>   
> -void i915_active_retire_noop(struct i915_active_request *active,
> -			     struct i915_request *request);
> +void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb);
>   
>   /**
> - * i915_active_request_init - prepares the activity tracker for use
> + * __i915_active_fence_init - prepares the activity tracker for use
>    * @active - the active tracker
> - * @rq - initial request to track, can be NULL
> + * @fence - initial fence to track, can be NULL
>    * @func - a callback when then the tracker is retired (becomes idle),
>    *         can be NULL
>    *
> - * i915_active_request_init() prepares the embedded @active struct for use as
> - * an activity tracker, that is for tracking the last known active request
> - * associated with it. When the last request becomes idle, when it is retired
> + * i915_active_fence_init() prepares the embedded @active struct for use as
> + * an activity tracker, that is for tracking the last known active fence
> + * associated with it. When the last fence becomes idle, when it is retired
>    * after completion, the optional callback @func is invoked.
>    */
>   static inline void
> -i915_active_request_init(struct i915_active_request *active,
> +__i915_active_fence_init(struct i915_active_fence *active,
>   			 struct mutex *lock,
> -			 struct i915_request *rq,
> -			 i915_active_retire_fn retire)
> +			 void *fence,
> +			 dma_fence_func_t fn)
>   {
> -	RCU_INIT_POINTER(active->request, rq);
> -	INIT_LIST_HEAD(&active->link);
> -	active->retire = retire ?: i915_active_retire_noop;
> +	RCU_INIT_POINTER(active->fence, fence);
> +	active->cb.func = fn ?: i915_active_noop;
>   #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>   	active->lock = lock;
>   #endif
>   }
>   
> -#define INIT_ACTIVE_REQUEST(name, lock) \
> -	i915_active_request_init((name), (lock), NULL, NULL)
> -
> -/**
> - * i915_active_request_set - updates the tracker to watch the current request
> - * @active - the active tracker
> - * @request - the request to watch
> - *
> - * __i915_active_request_set() watches the given @request for completion. Whilst
> - * that @request is busy, the @active reports busy. When that @request is
> - * retired, the @active tracker is updated to report idle.
> - */
> -static inline void
> -__i915_active_request_set(struct i915_active_request *active,
> -			  struct i915_request *request)
> -{
> -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> -	lockdep_assert_held(active->lock);
> -#endif
> -	list_move(&active->link, &request->active_list);
> -	rcu_assign_pointer(active->request, request);
> -}
> +#define INIT_ACTIVE_FENCE(A, LOCK) \
> +	__i915_active_fence_init((A), (LOCK), NULL, NULL)
>   
> -int __must_check
> -i915_active_request_set(struct i915_active_request *active,
> -			struct i915_request *rq);
> +struct dma_fence *
> +__i915_active_fence_set(struct i915_active_fence *active,
> +			struct dma_fence *fence);
>   
>   /**
> - * i915_active_request_raw - return the active request
> + * i915_active_fence_set - updates the tracker to watch the current fence
>    * @active - the active tracker
> + * @rq - the request to watch
>    *
> - * i915_active_request_raw() returns the current request being tracked, or NULL.
> - * It does not obtain a reference on the request for the caller, so the caller
> - * must hold struct_mutex.
> + * i915_active_fence_set() watches the given @rq for completion. While
> + * that @rq is busy, the @active reports busy. When that @rq is signaled
> + * (or else retired) the @active tracker is updated to report idle.
>    */
> -static inline struct i915_request *
> -i915_active_request_raw(const struct i915_active_request *active,
> -			struct mutex *mutex)
> -{
> -	return rcu_dereference_protected(active->request,
> -					 lockdep_is_held(mutex));
> -}
> -
> -/**
> - * i915_active_request_peek - report the active request being monitored
> - * @active - the active tracker
> - *
> - * i915_active_request_peek() returns the current request being tracked if
> - * still active, or NULL. It does not obtain a reference on the request
> - * for the caller, so the caller must hold struct_mutex.
> - */
> -static inline struct i915_request *
> -i915_active_request_peek(const struct i915_active_request *active,
> -			 struct mutex *mutex)
> -{
> -	struct i915_request *request;
> -
> -	request = i915_active_request_raw(active, mutex);
> -	if (!request || i915_request_completed(request))
> -		return NULL;
> -
> -	return request;
> -}
> -
> -/**
> - * i915_active_request_get - return a reference to the active request
> - * @active - the active tracker
> - *
> - * i915_active_request_get() returns a reference to the active request, or NULL
> - * if the active tracker is idle. The caller must hold struct_mutex.
> - */
> -static inline struct i915_request *
> -i915_active_request_get(const struct i915_active_request *active,
> -			struct mutex *mutex)
> -{
> -	return i915_request_get(i915_active_request_peek(active, mutex));
> -}
> -
> -/**
> - * __i915_active_request_get_rcu - return a reference to the active request
> - * @active - the active tracker
> - *
> - * __i915_active_request_get() returns a reference to the active request,
> - * or NULL if the active tracker is idle. The caller must hold the RCU read
> - * lock, but the returned pointer is safe to use outside of RCU.
> - */
> -static inline struct i915_request *
> -__i915_active_request_get_rcu(const struct i915_active_request *active)
> -{
> -	/*
> -	 * Performing a lockless retrieval of the active request is super
> -	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
> -	 * slab of request objects will not be freed whilst we hold the
> -	 * RCU read lock. It does not guarantee that the request itself
> -	 * will not be freed and then *reused*. Viz,
> -	 *
> -	 * Thread A			Thread B
> -	 *
> -	 * rq = active.request
> -	 *				retire(rq) -> free(rq);
> -	 *				(rq is now first on the slab freelist)
> -	 *				active.request = NULL
> -	 *
> -	 *				rq = new submission on a new object
> -	 * ref(rq)
> -	 *
> -	 * To prevent the request from being reused whilst the caller
> -	 * uses it, we take a reference like normal. Whilst acquiring
> -	 * the reference we check that it is not in a destroyed state
> -	 * (refcnt == 0). That prevents the request being reallocated
> -	 * whilst the caller holds on to it. To check that the request
> -	 * was not reallocated as we acquired the reference we have to
> -	 * check that our request remains the active request across
> -	 * the lookup, in the same manner as a seqlock. The visibility
> -	 * of the pointer versus the reference counting is controlled
> -	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
> -	 *
> -	 * In the middle of all that, we inspect whether the request is
> -	 * complete. Retiring is lazy so the request may be completed long
> -	 * before the active tracker is updated. Querying whether the
> -	 * request is complete is far cheaper (as it involves no locked
> -	 * instructions setting cachelines to exclusive) than acquiring
> -	 * the reference, so we do it first. The RCU read lock ensures the
> -	 * pointer dereference is valid, but does not ensure that the
> -	 * seqno nor HWS is the right one! However, if the request was
> -	 * reallocated, that means the active tracker's request was complete.
> -	 * If the new request is also complete, then both are and we can
> -	 * just report the active tracker is idle. If the new request is
> -	 * incomplete, then we acquire a reference on it and check that
> -	 * it remained the active request.
> -	 *
> -	 * It is then imperative that we do not zero the request on
> -	 * reallocation, so that we can chase the dangling pointers!
> -	 * See i915_request_alloc().
> -	 */
> -	do {
> -		struct i915_request *request;
> -
> -		request = rcu_dereference(active->request);
> -		if (!request || i915_request_completed(request))
> -			return NULL;
> -
> -		/*
> -		 * An especially silly compiler could decide to recompute the
> -		 * result of i915_request_completed, more specifically
> -		 * re-emit the load for request->fence.seqno. A race would catch
> -		 * a later seqno value, which could flip the result from true to
> -		 * false. Which means part of the instructions below might not
> -		 * be executed, while later on instructions are executed. Due to
> -		 * barriers within the refcounting the inconsistency can't reach
> -		 * past the call to i915_request_get_rcu, but not executing
> -		 * that while still executing i915_request_put() creates
> -		 * havoc enough.  Prevent this with a compiler barrier.
> -		 */
> -		barrier();
> -
> -		request = i915_request_get_rcu(request);
> -
> -		/*
> -		 * What stops the following rcu_access_pointer() from occurring
> -		 * before the above i915_request_get_rcu()? If we were
> -		 * to read the value before pausing to get the reference to
> -		 * the request, we may not notice a change in the active
> -		 * tracker.
> -		 *
> -		 * The rcu_access_pointer() is a mere compiler barrier, which
> -		 * means both the CPU and compiler are free to perform the
> -		 * memory read without constraint. The compiler only has to
> -		 * ensure that any operations after the rcu_access_pointer()
> -		 * occur afterwards in program order. This means the read may
> -		 * be performed earlier by an out-of-order CPU, or adventurous
> -		 * compiler.
> -		 *
> -		 * The atomic operation at the heart of
> -		 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
> -		 * atomic_inc_not_zero() which is only a full memory barrier
> -		 * when successful. That is, if i915_request_get_rcu()
> -		 * returns the request (and so with the reference counted
> -		 * incremented) then the following read for rcu_access_pointer()
> -		 * must occur after the atomic operation and so confirm
> -		 * that this request is the one currently being tracked.
> -		 *
> -		 * The corresponding write barrier is part of
> -		 * rcu_assign_pointer().
> -		 */
> -		if (!request || request == rcu_access_pointer(active->request))
> -			return rcu_pointer_handoff(request);
> -
> -		i915_request_put(request);
> -	} while (1);
> -}
> -
> +int __must_check
> +i915_active_fence_set(struct i915_active_fence *active,
> +		      struct i915_request *rq);
>   /**
> - * i915_active_request_get_unlocked - return a reference to the active request
> + * i915_active_fence_get - return a reference to the active fence
>    * @active - the active tracker
>    *
> - * i915_active_request_get_unlocked() returns a reference to the active request,
> + * i915_active_fence_get() returns a reference to the active fence,
>    * or NULL if the active tracker is idle. The reference is obtained under RCU,
>    * so no locking is required by the caller.
>    *
> - * The reference should be freed with i915_request_put().
> + * The reference should be freed with dma_fence_put().
>    */
> -static inline struct i915_request *
> -i915_active_request_get_unlocked(const struct i915_active_request *active)
> +static inline struct dma_fence *
> +i915_active_fence_get(struct i915_active_fence *active)
>   {
> -	struct i915_request *request;
> +	struct dma_fence *fence;
>   
>   	rcu_read_lock();
> -	request = __i915_active_request_get_rcu(active);
> +	fence = dma_fence_get_rcu_safe(&active->fence);
>   	rcu_read_unlock();
>   
> -	return request;
> +	return fence;
>   }
>   
>   /**
> - * i915_active_request_isset - report whether the active tracker is assigned
> + * i915_active_fence_isset - report whether the active tracker is assigned
>    * @active - the active tracker
>    *
> - * i915_active_request_isset() returns true if the active tracker is currently
> - * assigned to a request. Due to the lazy retiring, that request may be idle
> + * i915_active_fence_isset() returns true if the active tracker is currently
> + * assigned to a fence. Due to the lazy retiring, that fence may be idle
>    * and this may report stale information.
>    */
>   static inline bool
> -i915_active_request_isset(const struct i915_active_request *active)
> +i915_active_fence_isset(const struct i915_active_fence *active)
>   {
> -	return rcu_access_pointer(active->request);
> +	return rcu_access_pointer(active->fence);
>   }
>   
> -/**
> - * i915_active_request_retire - waits until the request is retired
> - * @active - the active request on which to wait
> - *
> - * i915_active_request_retire() waits until the request is completed,
> - * and then ensures that at least the retirement handler for this
> - * @active tracker is called before returning. If the @active
> - * tracker is idle, the function returns immediately.
> - */
> -static inline int __must_check
> -i915_active_request_retire(struct i915_active_request *active,
> -			   struct mutex *mutex)
> +static inline void
> +i915_active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
>   {
> -	struct i915_request *request;
> -	long ret;
> -
> -	request = i915_active_request_raw(active, mutex);
> -	if (!request)
> -		return 0;
> -
> -	ret = i915_request_wait(request,
> -				I915_WAIT_INTERRUPTIBLE,
> -				MAX_SCHEDULE_TIMEOUT);
> -	if (ret < 0)
> -		return ret;
> +	struct i915_active_fence *active =
> +		container_of(cb, typeof(*active), cb);
>   
> -	list_del_init(&active->link);
> -	RCU_INIT_POINTER(active->request, NULL);
> -
> -	active->retire(active, request);
> -
> -	return 0;
> +	RCU_INIT_POINTER(active->fence, NULL);
>   }
>   
>   /*
> @@ -358,47 +162,40 @@ i915_active_request_retire(struct i915_active_request *active,
>    * synchronisation.
>    */
>   
> -void __i915_active_init(struct drm_i915_private *i915,
> -			struct i915_active *ref,
> +void __i915_active_init(struct i915_active *ref,
>   			int (*active)(struct i915_active *ref),
>   			void (*retire)(struct i915_active *ref),
>   			struct lock_class_key *key);
> -#define i915_active_init(i915, ref, active, retire) do {		\
> +#define i915_active_init(ref, active, retire) do {		\
>   	static struct lock_class_key __key;				\
>   									\
> -	__i915_active_init(i915, ref, active, retire, &__key);		\
> +	__i915_active_init(ref, active, retire, &__key);		\
>   } while (0)
>   
>   int i915_active_ref(struct i915_active *ref,
>   		    struct intel_timeline *tl,
> -		    struct i915_request *rq);
> +		    struct dma_fence *fence);
>   
>   static inline int
>   i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
>   {
> -	return i915_active_ref(ref, i915_request_timeline(rq), rq);
> +	return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
>   }
>   
>   void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
>   
>   static inline bool i915_active_has_exclusive(struct i915_active *ref)
>   {
> -	return rcu_access_pointer(ref->excl);
> +	return rcu_access_pointer(ref->excl.fence);
>   }
>   
>   int i915_active_wait(struct i915_active *ref);
>   
> -int i915_request_await_active(struct i915_request *rq,
> -			      struct i915_active *ref);
> -int i915_request_await_active_request(struct i915_request *rq,
> -				      struct i915_active_request *active);
> +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref);
>   
>   int i915_active_acquire(struct i915_active *ref);
> +bool i915_active_acquire_if_busy(struct i915_active *ref);
>   void i915_active_release(struct i915_active *ref);
> -void __i915_active_release_nested(struct i915_active *ref, int subclass);
> -
> -bool i915_active_trygrab(struct i915_active *ref);
> -void i915_active_ungrab(struct i915_active *ref);
>   
>   static inline bool
>   i915_active_is_idle(const struct i915_active *ref)
> diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> index 021167f0004d..d89a74c142c6 100644
> --- a/drivers/gpu/drm/i915/i915_active_types.h
> +++ b/drivers/gpu/drm/i915/i915_active_types.h
> @@ -17,17 +17,9 @@
>   
>   #include "i915_utils.h"
>   
> -struct drm_i915_private;
> -struct i915_active_request;
> -struct i915_request;
> -
> -typedef void (*i915_active_retire_fn)(struct i915_active_request *,
> -				      struct i915_request *);
> -
> -struct i915_active_request {
> -	struct i915_request __rcu *request;
> -	struct list_head link;
> -	i915_active_retire_fn retire;
> +struct i915_active_fence {
> +	struct dma_fence __rcu *fence;
> +	struct dma_fence_cb cb;
>   #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>   	/*
>   	 * Incorporeal!
> @@ -53,20 +45,17 @@ struct active_node;
>   #define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2)
>   
>   struct i915_active {
> -	struct drm_i915_private *i915;
> +	atomic_t count;
> +	struct mutex mutex;
>   
>   	struct active_node *cache;
>   	struct rb_root tree;
> -	struct mutex mutex;
> -	atomic_t count;
>   
>   	/* Preallocated "exclusive" node */
> -	struct dma_fence __rcu *excl;
> -	struct dma_fence_cb excl_cb;
> +	struct i915_active_fence excl;
>   
>   	unsigned long flags;
>   #define I915_ACTIVE_RETIRE_SLEEPS BIT(0)
> -#define I915_ACTIVE_GRAB_BIT 1
>   
>   	int (*active)(struct i915_active *ref);
>   	void (*retire)(struct i915_active *ref);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f50058cf8ab8..64890627d638 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -892,28 +892,38 @@ wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout)
>   
>   	spin_lock_irqsave(&timelines->lock, flags);
>   	list_for_each_entry(tl, &timelines->active_list, link) {
> -		struct i915_request *rq;
> +		struct dma_fence *fence;
>   
> -		rq = i915_active_request_get_unlocked(&tl->last_request);
> -		if (!rq)
> +		fence = i915_active_fence_get(&tl->last_request);
> +		if (!fence)
>   			continue;
>   
>   		spin_unlock_irqrestore(&timelines->lock, flags);
>   
> -		/*
> -		 * "Race-to-idle".
> -		 *
> -		 * Switching to the kernel context is often used a synchronous
> -		 * step prior to idling, e.g. in suspend for flushing all
> -		 * current operations to memory before sleeping. These we
> -		 * want to complete as quickly as possible to avoid prolonged
> -		 * stalls, so allow the gpu to boost to maximum clocks.
> -		 */
> -		if (wait & I915_WAIT_FOR_IDLE_BOOST)
> -			gen6_rps_boost(rq);
> +		if (!dma_fence_is_i915(fence)) {
> +			timeout = dma_fence_wait_timeout(fence,
> +							 flags & I915_WAIT_INTERRUPTIBLE,
> +							 timeout);
> +		} else {
> +			struct i915_request *rq = to_request(fence);
> +
> +			/*
> +			 * "Race-to-idle".
> +			 *
> +			 * Switching to the kernel context is often used as
> +			 * a synchronous step prior to idling, e.g. in suspend
> +			 * for flushing all current operations to memory before
> +			 * sleeping. These we want to complete as quickly as
> +			 * possible to avoid prolonged stalls, so allow the gpu
> +			 * to boost to maximum clocks.
> +			 */
> +			if (flags & I915_WAIT_FOR_IDLE_BOOST)
> +				gen6_rps_boost(rq);
> +
> +			timeout = i915_request_wait(rq, flags, timeout);
> +		}
>   
> -		timeout = i915_request_wait(rq, wait, timeout);
> -		i915_request_put(rq);
> +		dma_fence_put(fence);
>   		if (timeout < 0)
>   			return timeout;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 55cebf256d03..7462d87f7a48 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1861,7 +1861,6 @@ static const struct i915_vma_ops pd_vma_ops = {
>   
>   static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
>   {
> -	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
>   	struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
>   	struct i915_vma *vma;
>   
> @@ -1872,7 +1871,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
>   	if (!vma)
>   		return ERR_PTR(-ENOMEM);
>   
> -	i915_active_init(i915, &vma->active, NULL, NULL);
> +	i915_active_init(&vma->active, NULL, NULL);
>   
>   	mutex_init(&vma->pages_mutex);
>   	vma->vm = i915_vm_get(&ggtt->vm);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 6384a06aa5bf..a28ee754b7b4 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1299,7 +1299,7 @@ capture_vma(struct capture_vma *next,
>   	if (!c)
>   		return next;
>   
> -	if (!i915_active_trygrab(&vma->active)) {
> +	if (!i915_active_acquire_if_busy(&vma->active)) {
>   		kfree(c);
>   		return next;
>   	}
> @@ -1439,7 +1439,7 @@ gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
>   			*this->slot =
>   				i915_error_object_create(i915, vma, compress);
>   
> -			i915_active_ungrab(&vma->active);
> +			i915_active_release(&vma->active);
>   			i915_vma_put(vma);
>   
>   			capture = this->next;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index a8916412759b..4ffe62a42186 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -218,8 +218,6 @@ static void remove_from_engine(struct i915_request *rq)
>   
>   static bool i915_request_retire(struct i915_request *rq)
>   {
> -	struct i915_active_request *active, *next;
> -
>   	if (!i915_request_completed(rq))
>   		return false;
>   
> @@ -244,35 +242,6 @@ static bool i915_request_retire(struct i915_request *rq)
>   				  &i915_request_timeline(rq)->requests));
>   	rq->ring->head = rq->postfix;
>   
> -	/*
> -	 * Walk through the active list, calling retire on each. This allows
> -	 * objects to track their GPU activity and mark themselves as idle
> -	 * when their *last* active request is completed (updating state
> -	 * tracking lists for eviction, active references for GEM, etc).
> -	 *
> -	 * As the ->retire() may free the node, we decouple it first and
> -	 * pass along the auxiliary information (to avoid dereferencing
> -	 * the node after the callback).
> -	 */
> -	list_for_each_entry_safe(active, next, &rq->active_list, link) {
> -		/*
> -		 * In microbenchmarks or focusing upon time inside the kernel,
> -		 * we may spend an inordinate amount of time simply handling
> -		 * the retirement of requests and processing their callbacks.
> -		 * Of which, this loop itself is particularly hot due to the
> -		 * cache misses when jumping around the list of
> -		 * i915_active_request.  So we try to keep this loop as
> -		 * streamlined as possible and also prefetch the next
> -		 * i915_active_request to try and hide the likely cache miss.
> -		 */
> -		prefetchw(next);
> -
> -		INIT_LIST_HEAD(&active->link);
> -		RCU_INIT_POINTER(active->request, NULL);
> -
> -		active->retire(active, rq);
> -	}
> -
>   	local_irq_disable();
>   
>   	/*
> @@ -704,7 +673,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>   	rq->flags = 0;
>   	rq->execution_mask = ALL_ENGINES;
>   
> -	INIT_LIST_HEAD(&rq->active_list);
>   	INIT_LIST_HEAD(&rq->execute_cb);
>   
>   	/*
> @@ -743,7 +711,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>   	ce->ring->emit = rq->head;
>   
>   	/* Make sure we didn't add ourselves to external state before freeing */
> -	GEM_BUG_ON(!list_empty(&rq->active_list));
>   	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
>   	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
>   
> @@ -1174,8 +1141,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
>   	 * precludes optimising to use semaphores serialisation of a single
>   	 * timeline across engines.
>   	 */
> -	prev = rcu_dereference_protected(timeline->last_request.request,
> -					 lockdep_is_held(&timeline->mutex));
> +	prev = to_request(__i915_active_fence_set(&timeline->last_request,
> +						  &rq->fence));
>   	if (prev && !i915_request_completed(prev)) {
>   		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
>   			i915_sw_fence_await_sw_fence(&rq->submit,
> @@ -1200,7 +1167,6 @@ __i915_request_add_to_timeline(struct i915_request *rq)
>   	 * us, the timeline will hold its seqno which is later than ours.
>   	 */
>   	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
> -	__i915_active_request_set(&timeline->last_request, rq);
>   
>   	return prev;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index ec5bb4c2e5ae..91a885c36c6b 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -211,7 +211,6 @@ struct i915_request {
>   	 * on the active_list (of their final request).
>   	 */
>   	struct i915_capture_list *capture_list;
> -	struct list_head active_list;
>   
>   	/** Time at which this request was emitted, in jiffies. */
>   	unsigned long emitted_jiffies;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index e191247c7c5f..9fdcd4e2c799 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -120,8 +120,7 @@ vma_create(struct drm_i915_gem_object *obj,
>   	vma->size = obj->base.size;
>   	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
>   
> -	i915_active_init(vm->i915, &vma->active,
> -			 __i915_vma_active, __i915_vma_retire);
> +	i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire);
>   
>   	/* Declare ourselves safe for use inside shrinkers */
>   	if (IS_ENABLED(CONFIG_LOCKDEP)) {
> @@ -1148,6 +1147,7 @@ int __i915_vma_unbind(struct i915_vma *vma)
>   	if (ret)
>   		return ret;
>   
> +	GEM_BUG_ON(i915_vma_is_active(vma));
>   	if (i915_vma_is_pinned(vma)) {
>   		vma_print_allocator(vma, "is pinned");
>   		return -EBUSY;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> index a41785822ed9..2cc71bcf884f 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_active.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> @@ -68,7 +68,7 @@ static struct live_active *__live_alloc(struct drm_i915_private *i915)
>   		return NULL;
>   
>   	kref_init(&active->ref);
> -	i915_active_init(i915, &active->base, __live_active, __live_retire);
> +	i915_active_init(&active->base, __live_active, __live_retire);
>   
>   	return active;
>   }
> @@ -146,19 +146,13 @@ static int live_active_wait(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
>   	struct live_active *active;
> -	intel_wakeref_t wakeref;
>   	int err = 0;
>   
>   	/* Check that we get a callback when requests retire upon waiting */
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> -	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
> -
>   	active = __live_active_setup(i915);
> -	if (IS_ERR(active)) {
> -		err = PTR_ERR(active);
> -		goto err;
> -	}
> +	if (IS_ERR(active))
> +		return PTR_ERR(active);
>   
>   	i915_active_wait(&active->base);
>   	if (!READ_ONCE(active->retired)) {
> @@ -168,11 +162,9 @@ static int live_active_wait(void *arg)
>   
>   	__live_put(active);
>   
> +	mutex_lock(&i915->drm.struct_mutex);
>   	if (igt_flush_test(i915, I915_WAIT_LOCKED))
>   		err = -EIO;
> -
> -err:
> -	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   
>   	return err;
> @@ -182,23 +174,19 @@ static int live_active_retire(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
>   	struct live_active *active;
> -	intel_wakeref_t wakeref;
>   	int err = 0;
>   
>   	/* Check that we get a callback when requests are indirectly retired */
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> -	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
> -
>   	active = __live_active_setup(i915);
> -	if (IS_ERR(active)) {
> -		err = PTR_ERR(active);
> -		goto err;
> -	}
> +	if (IS_ERR(active))
> +		return PTR_ERR(active);
>   
>   	/* waits for & retires all requests */
> +	mutex_lock(&i915->drm.struct_mutex);
>   	if (igt_flush_test(i915, I915_WAIT_LOCKED))
>   		err = -EIO;
> +	mutex_unlock(&i915->drm.struct_mutex);
>   
>   	if (!READ_ONCE(active->retired)) {
>   		pr_err("i915_active not retired after flushing!\n");
> @@ -207,10 +195,6 @@ static int live_active_retire(void *arg)
>   
>   	__live_put(active);
>   
> -err:
> -	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
>   	return err;
>   }
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 13/30] drm/i915/gem: Retire directly for mmap-offset shrinking
  2019-10-02 11:19 ` [PATCH 13/30] drm/i915/gem: Retire directly for mmap-offset shrinking Chris Wilson
@ 2019-10-02 15:53   ` Tvrtko Ursulin
  0 siblings, 0 replies; 50+ messages in thread
From: Tvrtko Ursulin @ 2019-10-02 15:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Matthew Auld


On 02/10/2019 12:19, Chris Wilson wrote:
> Now that we can retire without taking struct_mutex, we can do so to
> handle shrinking the mmap-offset space after an allocation failure.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_mman.c | 17 +++++------------
>   1 file changed, 5 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> index 418d0d2b5fa9..45bbd22c14f1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> @@ -431,19 +431,12 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj)
>   		return 0;
>   
>   	/* Attempt to reap some mmap space from dead objects */
> -	do {
> -		err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
> -		if (err)
> -			break;
> +	err = i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT);
> +	if (err)
> +		return err;
>   
> -		i915_gem_drain_freed_objects(i915);
> -		err = drm_gem_create_mmap_offset(&obj->base);
> -		if (!err)
> -			break;
> -
> -	} while (flush_delayed_work(&i915->gem.retire_work));
> -
> -	return err;
> +	i915_gem_drain_freed_objects(i915);
> +	return drm_gem_create_mmap_offset(&obj->base);
>   }
>   
>   int
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 14/30] drm/i915: Move request runtime management onto gt
  2019-10-02 11:19 ` [PATCH 14/30] drm/i915: Move request runtime management onto gt Chris Wilson
@ 2019-10-02 15:58   ` Tvrtko Ursulin
  0 siblings, 0 replies; 50+ messages in thread
From: Tvrtko Ursulin @ 2019-10-02 15:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 02/10/2019 12:19, Chris Wilson wrote:
> Requests are run from the gt and are tided into the gt runtime power
> management, so pull the runtime request management under gt/
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile                 |   1 +
>   drivers/gpu/drm/i915/gem/i915_gem_mman.c      |   4 +-
>   drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  28 +---
>   .../drm/i915/gem/selftests/i915_gem_context.c |   5 +-
>   .../drm/i915/gem/selftests/i915_gem_mman.c    |   2 +-
>   drivers/gpu/drm/i915/gt/intel_gt.c            |   2 +
>   drivers/gpu/drm/i915/gt/intel_gt_pm.c         |   5 +-
>   drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 123 ++++++++++++++++++
>   drivers/gpu/drm/i915/gt/intel_gt_requests.h   |  24 ++++
>   drivers/gpu/drm/i915/gt/intel_gt_types.h      |  11 ++
>   drivers/gpu/drm/i915/gt/selftest_timeline.c   |   8 +-
>   drivers/gpu/drm/i915/i915_debugfs.c           |  17 +--
>   drivers/gpu/drm/i915/i915_drv.h               |  10 --
>   drivers/gpu/drm/i915/i915_gem.c               |  17 ---
>   drivers/gpu/drm/i915/i915_gem_evict.c         |  14 +-
>   drivers/gpu/drm/i915/i915_gem_gtt.c           |   5 +-
>   drivers/gpu/drm/i915/i915_request.c           |  64 +--------
>   drivers/gpu/drm/i915/i915_request.h           |   7 +-
>   .../gpu/drm/i915/selftests/igt_flush_test.c   |   9 +-
>   .../gpu/drm/i915/selftests/igt_live_test.c    |   5 +-
>   .../gpu/drm/i915/selftests/mock_gem_device.c  |  10 +-
>   21 files changed, 213 insertions(+), 158 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_requests.c
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_requests.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index d2b53b5add81..06e1876d0250 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -83,6 +83,7 @@ gt-y += \
>   	gt/intel_gt_irq.o \
>   	gt/intel_gt_pm.o \
>   	gt/intel_gt_pm_irq.o \
> +	gt/intel_gt_requests.o \
>   	gt/intel_hangcheck.o \
>   	gt/intel_lrc.o \
>   	gt/intel_rc6.o \
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> index 45bbd22c14f1..fd4122d8c0a9 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> @@ -8,6 +8,7 @@
>   #include <linux/sizes.h>
>   
>   #include "gt/intel_gt.h"
> +#include "gt/intel_gt_requests.h"
>   
>   #include "i915_drv.h"
>   #include "i915_gem_gtt.h"
> @@ -424,6 +425,7 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
>   static int create_mmap_offset(struct drm_i915_gem_object *obj)
>   {
>   	struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +	struct intel_gt *gt = &i915->gt;
>   	int err;
>   
>   	err = drm_gem_create_mmap_offset(&obj->base);
> @@ -431,7 +433,7 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj)
>   		return 0;
>   
>   	/* Attempt to reap some mmap space from dead objects */
> -	err = i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT);
> +	err = intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT);
>   	if (err)
>   		return err;
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 90b211257f2d..9194d8464bf7 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -7,31 +7,18 @@
>   #include "gem/i915_gem_pm.h"
>   #include "gt/intel_gt.h"
>   #include "gt/intel_gt_pm.h"
> +#include "gt/intel_gt_requests.h"
>   
>   #include "i915_drv.h"
>   #include "i915_globals.h"
>   
>   static void i915_gem_park(struct drm_i915_private *i915)
>   {
> -	cancel_delayed_work(&i915->gem.retire_work);
> -
>   	i915_vma_parked(i915);
>   
>   	i915_globals_park();
>   }
>   
> -static void retire_work_handler(struct work_struct *work)
> -{
> -	struct drm_i915_private *i915 =
> -		container_of(work, typeof(*i915), gem.retire_work.work);
> -
> -	i915_retire_requests(i915);
> -
> -	queue_delayed_work(i915->wq,
> -			   &i915->gem.retire_work,
> -			   round_jiffies_up_relative(HZ));
> -}
> -
>   static int pm_notifier(struct notifier_block *nb,
>   		       unsigned long action,
>   		       void *data)
> @@ -42,9 +29,6 @@ static int pm_notifier(struct notifier_block *nb,
>   	switch (action) {
>   	case INTEL_GT_UNPARK:
>   		i915_globals_unpark();
> -		queue_delayed_work(i915->wq,
> -				   &i915->gem.retire_work,
> -				   round_jiffies_up_relative(HZ));
>   		break;
>   
>   	case INTEL_GT_PARK:
> @@ -59,7 +43,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt)
>   {
>   	bool result = !intel_gt_is_wedged(gt);
>   
> -	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
> +	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
>   		/* XXX hide warning from gem_eio */
>   		if (i915_modparams.reset) {
>   			dev_err(gt->i915->drm.dev,
> @@ -122,14 +106,12 @@ void i915_gem_suspend(struct drm_i915_private *i915)
>   	 * state. Fortunately, the kernel_context is disposable and we do
>   	 * not rely on its state.
>   	 */
> -	switch_to_kernel_context_sync(&i915->gt);
> +	intel_gt_suspend(&i915->gt);
> +	intel_uc_suspend(&i915->gt.uc);
>   
>   	cancel_delayed_work_sync(&i915->gt.hangcheck.work);
>   
>   	i915_gem_drain_freed_objects(i915);
> -
> -	intel_uc_suspend(&i915->gt.uc);
> -	intel_gt_suspend(&i915->gt);
>   }
>   
>   static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
> @@ -239,8 +221,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
>   
>   void i915_gem_init__pm(struct drm_i915_private *i915)
>   {
> -	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
> -
>   	i915->gem.pm_notifier.notifier_call = pm_notifier;
>   	blocking_notifier_chain_register(&i915->gt.pm_notifications,
>   					 &i915->gem.pm_notifier);
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index f902aeee1755..2288757808ae 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -8,6 +8,7 @@
>   
>   #include "gem/i915_gem_pm.h"
>   #include "gt/intel_gt.h"
> +#include "gt/intel_gt_requests.h"
>   #include "gt/intel_reset.h"
>   #include "i915_selftest.h"
>   
> @@ -518,7 +519,7 @@ create_test_object(struct i915_address_space *vm,
>   	int err;
>   
>   	/* Keep in GEM's good graces */
> -	i915_retire_requests(vm->i915);
> +	intel_gt_retire_requests(vm->gt);
>   
>   	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
>   	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
> @@ -1136,7 +1137,7 @@ __sseu_finish(const char *name,
>   		igt_spinner_end(spin);
>   
>   	if ((flags & TEST_IDLE) && ret == 0) {
> -		ret = i915_gem_wait_for_idle(ce->engine->i915,
> +		ret = intel_gt_wait_for_idle(ce->engine->gt,
>   					     MAX_SCHEDULE_TIMEOUT);
>   		if (ret)
>   			return ret;
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> index 4ba6ed5c8313..1cd25cfd0246 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> @@ -573,7 +573,7 @@ static void disable_retire_worker(struct drm_i915_private *i915)
>   {
>   	i915_gem_driver_unregister__shrinker(i915);
>   	intel_gt_pm_get(&i915->gt);
> -	cancel_delayed_work_sync(&i915->gem.retire_work);
> +	cancel_delayed_work_sync(&i915->gt.requests.retire_work);
>   }
>   
>   static void restore_retire_worker(struct drm_i915_private *i915)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 7205595369be..53220741e49e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -6,6 +6,7 @@
>   #include "i915_drv.h"
>   #include "intel_gt.h"
>   #include "intel_gt_pm.h"
> +#include "intel_gt_requests.h"
>   #include "intel_mocs.h"
>   #include "intel_rc6.h"
>   #include "intel_uncore.h"
> @@ -23,6 +24,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
>   
>   	intel_gt_init_hangcheck(gt);
>   	intel_gt_init_reset(gt);
> +	intel_gt_init_requests(gt);
>   	intel_gt_pm_init_early(gt);
>   	intel_uc_init_early(&gt->uc);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index bdb34f03ec47..d2e80ba64d69 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -10,6 +10,7 @@
>   #include "intel_engine_pm.h"
>   #include "intel_gt.h"
>   #include "intel_gt_pm.h"
> +#include "intel_gt_requests.h"
>   #include "intel_pm.h"
>   #include "intel_rc6.h"
>   #include "intel_wakeref.h"
> @@ -49,6 +50,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
>   	i915_pmu_gt_unparked(i915);
>   
>   	intel_gt_queue_hangcheck(gt);
> +	intel_gt_unpark_requests(gt);
>   
>   	pm_notify(gt, INTEL_GT_UNPARK);
>   
> @@ -64,6 +66,7 @@ static int __gt_park(struct intel_wakeref *wf)
>   	GEM_TRACE("\n");
>   
>   	pm_notify(gt, INTEL_GT_PARK);
> +	intel_gt_park_requests(gt);
>   
>   	i915_pmu_gt_parked(i915);
>   	if (INTEL_GEN(i915) >= 6)
> @@ -196,7 +199,7 @@ int intel_gt_resume(struct intel_gt *gt)
>   
>   static void wait_for_idle(struct intel_gt *gt)
>   {
> -	if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
> +	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
>   		/*
>   		 * Forcibly cancel outstanding work and leave
>   		 * the gpu quiet.
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> new file mode 100644
> index 000000000000..8aed89fd2cdc
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> @@ -0,0 +1,123 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_request.h"
> +#include "intel_gt.h"
> +#include "intel_gt_pm.h"
> +#include "intel_gt_requests.h"
> +#include "intel_timeline.h"
> +
> +static void retire_requests(struct intel_timeline *tl)
> +{
> +	struct i915_request *rq, *rn;
> +
> +	list_for_each_entry_safe(rq, rn, &tl->requests, link)
> +		if (!i915_request_retire(rq))
> +			break;
> +}
> +
> +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
> +{
> +	struct intel_gt_timelines *timelines = &gt->timelines;
> +	struct intel_timeline *tl, *tn;
> +	unsigned long active_count = 0;
> +	unsigned long flags;
> +	bool interruptible;
> +	LIST_HEAD(free);
> +
> +	interruptible = true;
> +	if (unlikely(timeout < 0))
> +		timeout = -timeout, interruptible = false;
> +
> +	spin_lock_irqsave(&timelines->lock, flags);
> +	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
> +		if (!mutex_trylock(&tl->mutex))
> +			continue;
> +
> +		intel_timeline_get(tl);
> +		GEM_BUG_ON(!tl->active_count);
> +		tl->active_count++; /* pin the list element */
> +		spin_unlock_irqrestore(&timelines->lock, flags);
> +
> +		if (timeout > 0) {
> +			struct dma_fence *fence;
> +
> +			fence = i915_active_fence_get(&tl->last_request);
> +			if (fence) {
> +				timeout = dma_fence_wait_timeout(fence,
> +								 true,
> +								 timeout);
> +				dma_fence_put(fence);
> +			}
> +		}
> +
> +		retire_requests(tl);
> +
> +		spin_lock_irqsave(&timelines->lock, flags);
> +
> +		/* Resume iteration after dropping lock */
> +		list_safe_reset_next(tl, tn, link);
> +		if (--tl->active_count)
> +			active_count += !!rcu_access_pointer(tl->last_request.fence);
> +		else
> +			list_del(&tl->link);
> +
> +		mutex_unlock(&tl->mutex);
> +
> +		/* Defer the final release to after the spinlock */
> +		if (refcount_dec_and_test(&tl->kref.refcount)) {
> +			GEM_BUG_ON(tl->active_count);
> +			list_add(&tl->link, &free);
> +		}
> +	}
> +	spin_unlock_irqrestore(&timelines->lock, flags);
> +
> +	list_for_each_entry_safe(tl, tn, &free, link)
> +		__intel_timeline_free(&tl->kref);
> +
> +	return active_count ? timeout : 0;
> +}
> +
> +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
> +{
> +	/* If the device is asleep, we have no requests outstanding */
> +	if (!intel_gt_pm_is_awake(gt))
> +		return 0;
> +
> +	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
> +		cond_resched();
> +		if (signal_pending(current))
> +			return -EINTR;
> +	}
> +
> +	return timeout;
> +}
> +
> +static void retire_work_handler(struct work_struct *work)
> +{
> +	struct intel_gt *gt =
> +		container_of(work, typeof(*gt), requests.retire_work.work);
> +
> +	intel_gt_retire_requests(gt);
> +	schedule_delayed_work(&gt->requests.retire_work,
> +			      round_jiffies_up_relative(HZ));
> +}
> +
> +void intel_gt_init_requests(struct intel_gt *gt)
> +{
> +	INIT_DELAYED_WORK(&gt->requests.retire_work, retire_work_handler);
> +}
> +
> +void intel_gt_park_requests(struct intel_gt *gt)
> +{
> +	cancel_delayed_work(&gt->requests.retire_work);
> +}
> +
> +void intel_gt_unpark_requests(struct intel_gt *gt)
> +{
> +	schedule_delayed_work(&gt->requests.retire_work,
> +			      round_jiffies_up_relative(HZ));
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
> new file mode 100644
> index 000000000000..bd31cbce47e0
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
> @@ -0,0 +1,24 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_GT_REQUESTS_H
> +#define INTEL_GT_REQUESTS_H
> +
> +struct intel_gt;
> +
> +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
> +static inline void intel_gt_retire_requests(struct intel_gt *gt)
> +{
> +	intel_gt_retire_requests_timeout(gt, 0);
> +}
> +
> +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
> +
> +void intel_gt_init_requests(struct intel_gt *gt);
> +void intel_gt_park_requests(struct intel_gt *gt);
> +void intel_gt_unpark_requests(struct intel_gt *gt);
> +
> +#endif /* INTEL_GT_REQUESTS_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index 7134f1319bbe..802f516a3430 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -50,6 +50,17 @@ struct intel_gt {
>   		struct list_head hwsp_free_list;
>   	} timelines;
>   
> +	struct intel_gt_requests {
> +		/**
> +		 * We leave the user IRQ off as much as possible,
> +		 * but this means that requests will finish and never
> +		 * be retired once the system goes idle. Set a timer to
> +		 * fire periodically while the ring is running. When it
> +		 * fires, go retire requests.
> +		 */
> +		struct delayed_work retire_work;
> +	} requests;
> +
>   	struct intel_wakeref wakeref;
>   	atomic_t user_wakeref;
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> index 16abfabf08c7..d6df40cdc8a6 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> @@ -8,6 +8,7 @@
>   
>   #include "intel_engine_pm.h"
>   #include "intel_gt.h"
> +#include "intel_gt_requests.h"
>   
>   #include "../selftests/i915_random.h"
>   #include "../i915_selftest.h"
> @@ -641,6 +642,7 @@ static int live_hwsp_alternate(void *arg)
>   static int live_hwsp_wrap(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> +	struct intel_gt *gt = &i915->gt;
>   	struct intel_engine_cs *engine;
>   	struct intel_timeline *tl;
>   	enum intel_engine_id id;
> @@ -651,7 +653,7 @@ static int live_hwsp_wrap(void *arg)
>   	 * foreign GPU references.
>   	 */
>   
> -	tl = intel_timeline_create(&i915->gt, NULL);
> +	tl = intel_timeline_create(gt, NULL);
>   	if (IS_ERR(tl))
>   		return PTR_ERR(tl);
>   
> @@ -662,7 +664,7 @@ static int live_hwsp_wrap(void *arg)
>   	if (err)
>   		goto out_free;
>   
> -	for_each_engine(engine, i915, id) {
> +	for_each_engine(engine, gt->i915, id) {
>   		const u32 *hwsp_seqno[2];
>   		struct i915_request *rq;
>   		u32 seqno[2];
> @@ -734,7 +736,7 @@ static int live_hwsp_wrap(void *arg)
>   			goto out;
>   		}
>   
> -		i915_retire_requests(i915); /* recycle HWSP */
> +		intel_gt_retire_requests(gt); /* recycle HWSP */
>   	}
>   
>   out:
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 5888a658e2b7..2afc41e43b6e 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -41,6 +41,7 @@
>   
>   #include "gem/i915_gem_context.h"
>   #include "gt/intel_gt_pm.h"
> +#include "gt/intel_gt_requests.h"
>   #include "gt/intel_reset.h"
>   #include "gt/intel_rc6.h"
>   #include "gt/uc/intel_guc_submission.h"
> @@ -3621,33 +3622,33 @@ static int
>   i915_drop_caches_set(void *data, u64 val)
>   {
>   	struct drm_i915_private *i915 = data;
> +	struct intel_gt *gt = &i915->gt;
>   	int ret;
>   
>   	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
>   		  val, val & DROP_ALL);
>   
>   	if (val & DROP_RESET_ACTIVE &&
> -	    wait_for(intel_engines_are_idle(&i915->gt),
> -		     I915_IDLE_ENGINES_TIMEOUT))
> -		intel_gt_set_wedged(&i915->gt);
> +	    wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT))
> +		intel_gt_set_wedged(gt);
>   
>   	if (val & DROP_RETIRE)
> -		i915_retire_requests(i915);
> +		intel_gt_retire_requests(gt);
>   
>   	if (val & (DROP_IDLE | DROP_ACTIVE)) {
> -		ret = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
> +		ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
>   		if (ret)
>   			return ret;
>   	}
>   
>   	if (val & DROP_IDLE) {
> -		ret = intel_gt_pm_wait_for_idle(&i915->gt);
> +		ret = intel_gt_pm_wait_for_idle(gt);
>   		if (ret)
>   			return ret;
>   	}
>   
> -	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt))
> -		intel_gt_handle_error(&i915->gt, ALL_ENGINES, 0, NULL);
> +	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt))
> +		intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL);
>   
>   	fs_reclaim_acquire(GFP_KERNEL);
>   	if (val & DROP_BOUND)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 44f3463ff9f1..cb63b2bd0ce8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1710,15 +1710,6 @@ struct drm_i915_private {
>   
>   	struct {
>   		struct notifier_block pm_notifier;
> -
> -		/**
> -		 * We leave the user IRQ off as much as possible,
> -		 * but this means that requests will finish and never
> -		 * be retired once the system goes idle. Set a timer to
> -		 * fire periodically while the ring is running. When it
> -		 * fires, go retire requests.
> -		 */
> -		struct delayed_work retire_work;
>   	} gem;
>   
>   	/* For i945gm vblank irq vs. C3 workaround */
> @@ -2321,7 +2312,6 @@ void i915_gem_driver_register(struct drm_i915_private *i915);
>   void i915_gem_driver_unregister(struct drm_i915_private *i915);
>   void i915_gem_driver_remove(struct drm_i915_private *dev_priv);
>   void i915_gem_driver_release(struct drm_i915_private *dev_priv);
> -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, long timeout);
>   void i915_gem_suspend(struct drm_i915_private *dev_priv);
>   void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
>   void i915_gem_resume(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7c82fc39f655..5a664bdead8c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -883,23 +883,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
>   	}
>   }
>   
> -int i915_gem_wait_for_idle(struct drm_i915_private *i915, long timeout)
> -{
> -	struct intel_gt *gt = &i915->gt;
> -
> -	/* If the device is asleep, we have no requests outstanding */
> -	if (!intel_gt_pm_is_awake(gt))
> -		return 0;
> -
> -	while ((timeout = i915_retire_requests_timeout(i915, timeout)) > 0) {
> -		cond_resched();
> -		if (signal_pending(current))
> -			return -EINTR;
> -	}
> -
> -	return timeout;
> -}
> -
>   struct i915_vma *
>   i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>   			 const struct i915_ggtt_view *view,
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 0a412f6d01d7..7e62c310290f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -29,6 +29,7 @@
>   #include <drm/i915_drm.h>
>   
>   #include "gem/i915_gem_context.h"
> +#include "gt/intel_gt_requests.h"
>   
>   #include "i915_drv.h"
>   #include "i915_trace.h"
> @@ -37,7 +38,7 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
>   	bool fail_if_busy:1;
>   } igt_evict_ctl;)
>   
> -static int ggtt_flush(struct drm_i915_private *i915)
> +static int ggtt_flush(struct intel_gt *gt)
>   {
>   	/*
>   	 * Not everything in the GGTT is tracked via vma (otherwise we
> @@ -46,7 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915)
>   	 * the hopes that we can then remove contexts and the like only
>   	 * bound by their active reference.
>   	 */
> -	return i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
> +	return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
>   }
>   
>   static bool
> @@ -92,7 +93,6 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   			 u64 start, u64 end,
>   			 unsigned flags)
>   {
> -	struct drm_i915_private *dev_priv = vm->i915;
>   	struct drm_mm_scan scan;
>   	struct list_head eviction_list;
>   	struct i915_vma *vma, *next;
> @@ -124,7 +124,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   				    min_size, alignment, color,
>   				    start, end, mode);
>   
> -	i915_retire_requests(vm->i915);
> +	intel_gt_retire_requests(vm->gt);
>   
>   search_again:
>   	active = NULL;
> @@ -197,7 +197,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   	if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
>   		return -EBUSY;
>   
> -	ret = ggtt_flush(dev_priv);
> +	ret = ggtt_flush(vm->gt);
>   	if (ret)
>   		return ret;
>   
> @@ -270,7 +270,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
>   	 * a stray pin (preventing eviction) that can only be resolved by
>   	 * retiring.
>   	 */
> -	i915_retire_requests(vm->i915);
> +	intel_gt_retire_requests(vm->gt);
>   
>   	if (i915_vm_has_cache_coloring(vm)) {
>   		/* Expand search to cover neighbouring guard pages (or lack!) */
> @@ -372,7 +372,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
>   	 * switch otherwise is ineffective.
>   	 */
>   	if (i915_is_ggtt(vm)) {
> -		ret = ggtt_flush(vm->i915);
> +		ret = ggtt_flush(vm->gt);
>   		if (ret)
>   			return ret;
>   	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 082fcf9085a6..1d26634ca597 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -38,6 +38,7 @@
>   
>   #include "display/intel_frontbuffer.h"
>   #include "gt/intel_gt.h"
> +#include "gt/intel_gt_requests.h"
>   
>   #include "i915_drv.h"
>   #include "i915_scatterlist.h"
> @@ -2529,8 +2530,8 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
>   
>   	if (unlikely(ggtt->do_idle_maps)) {
>   		/* XXX This does not prevent more requests being submitted! */
> -		if (i915_retire_requests_timeout(dev_priv,
> -						 -MAX_SCHEDULE_TIMEOUT)) {
> +		if (intel_gt_retire_requests_timeout(ggtt->vm.gt,
> +						     -MAX_SCHEDULE_TIMEOUT)) {
>   			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
>   			/* Wait a bit, in hopes it avoids the hang */
>   			udelay(10);
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 52f7c4e5b644..437f9fc6282e 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -216,7 +216,7 @@ static void remove_from_engine(struct i915_request *rq)
>   	spin_unlock(&locked->active.lock);
>   }
>   
> -static bool i915_request_retire(struct i915_request *rq)
> +bool i915_request_retire(struct i915_request *rq)
>   {
>   	if (!i915_request_completed(rq))
>   		return false;
> @@ -1508,68 +1508,6 @@ long i915_request_wait(struct i915_request *rq,
>   	return timeout;
>   }
>   
> -long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout)
> -{
> -	struct intel_gt_timelines *timelines = &i915->gt.timelines;
> -	struct intel_timeline *tl, *tn;
> -	unsigned long active_count = 0;
> -	unsigned long flags;
> -	bool interruptible;
> -	LIST_HEAD(free);
> -
> -	interruptible = true;
> -	if (timeout < 0)
> -		timeout = -timeout, interruptible = false;
> -
> -	spin_lock_irqsave(&timelines->lock, flags);
> -	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
> -		if (!mutex_trylock(&tl->mutex))
> -			continue;
> -
> -		intel_timeline_get(tl);
> -		GEM_BUG_ON(!tl->active_count);
> -		tl->active_count++; /* pin the list element */
> -		spin_unlock_irqrestore(&timelines->lock, flags);
> -
> -		if (timeout > 0) {
> -			struct dma_fence *fence;
> -
> -			fence = i915_active_fence_get(&tl->last_request);
> -			if (fence) {
> -				timeout = dma_fence_wait_timeout(fence,
> -								 interruptible,
> -								 timeout);
> -				dma_fence_put(fence);
> -			}
> -		}
> -
> -		retire_requests(tl);
> -
> -		spin_lock_irqsave(&timelines->lock, flags);
> -
> -		/* Resume iteration after dropping lock */
> -		list_safe_reset_next(tl, tn, link);
> -		if (--tl->active_count)
> -			active_count += !!rcu_access_pointer(tl->last_request.fence);
> -		else
> -			list_del(&tl->link);
> -
> -		mutex_unlock(&tl->mutex);
> -
> -		/* Defer the final release to after the spinlock */
> -		if (refcount_dec_and_test(&tl->kref.refcount)) {
> -			GEM_BUG_ON(tl->active_count);
> -			list_add(&tl->link, &free);
> -		}
> -	}
> -	spin_unlock_irqrestore(&timelines->lock, flags);
> -
> -	list_for_each_entry_safe(tl, tn, &free, link)
> -		__intel_timeline_free(&tl->kref);
> -
> -	return active_count ? timeout : 0;
> -}
> -
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>   #include "selftests/mock_request.c"
>   #include "selftests/i915_request.c"
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 256b0715180f..6a95242b280d 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -250,6 +250,7 @@ struct i915_request *__i915_request_commit(struct i915_request *request);
>   void __i915_request_queue(struct i915_request *rq,
>   			  const struct i915_sched_attr *attr);
>   
> +bool i915_request_retire(struct i915_request *rq);
>   void i915_request_retire_upto(struct i915_request *rq);
>   
>   static inline struct i915_request *
> @@ -459,10 +460,4 @@ i915_request_active_timeline(struct i915_request *rq)
>   					 lockdep_is_held(&rq->engine->active.lock));
>   }
>   
> -long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout);
> -static inline void i915_retire_requests(struct drm_i915_private *i915)
> -{
> -	i915_retire_requests_timeout(i915, 0);
> -}
> -
>   #endif /* I915_REQUEST_H */
> diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> index ed496bd6d84f..7b0939e3f007 100644
> --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> @@ -4,8 +4,8 @@
>    * Copyright © 2018 Intel Corporation
>    */
>   
> -#include "gem/i915_gem_context.h"
>   #include "gt/intel_gt.h"
> +#include "gt/intel_gt_requests.h"
>   
>   #include "i915_drv.h"
>   #include "i915_selftest.h"
> @@ -14,11 +14,12 @@
>   
>   int igt_flush_test(struct drm_i915_private *i915)
>   {
> -	int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0;
> +	struct intel_gt *gt = &i915->gt;
> +	int ret = intel_gt_is_wedged(gt) ? -EIO : 0;
>   
>   	cond_resched();
>   
> -	if (i915_gem_wait_for_idle(i915, HZ / 5) == -ETIME) {
> +	if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
>   		pr_err("%pS timed out, cancelling all further testing.\n",
>   		       __builtin_return_address(0));
>   
> @@ -26,7 +27,7 @@ int igt_flush_test(struct drm_i915_private *i915)
>   			  __builtin_return_address(0));
>   		GEM_TRACE_DUMP();
>   
> -		intel_gt_set_wedged(&i915->gt);
> +		intel_gt_set_wedged(gt);
>   		ret = -EIO;
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
> index eae90f97df6c..810b60100c2c 100644
> --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c
> +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
> @@ -4,7 +4,8 @@
>    * Copyright © 2018 Intel Corporation
>    */
>   
> -#include "../i915_drv.h"
> +#include "i915_drv.h"
> +#include "gt/intel_gt_requests.h"
>   
>   #include "../i915_selftest.h"
>   #include "igt_flush_test.h"
> @@ -23,7 +24,7 @@ int igt_live_test_begin(struct igt_live_test *t,
>   	t->func = func;
>   	t->name = name;
>   
> -	err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT);
> +	err = intel_gt_wait_for_idle(&i915->gt, MAX_SCHEDULE_TIMEOUT);
>   	if (err) {
>   		pr_err("%s(%s): failed to idle before, with err=%d!",
>   		       func, name, err);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 3b589bbb2c2d..4e6cde0d4859 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -26,6 +26,7 @@
>   #include <linux/pm_runtime.h>
>   
>   #include "gt/intel_gt.h"
> +#include "gt/intel_gt_requests.h"
>   #include "gt/mock_engine.h"
>   
>   #include "mock_request.h"
> @@ -44,7 +45,8 @@ void mock_device_flush(struct drm_i915_private *i915)
>   	do {
>   		for_each_engine(engine, i915, id)
>   			mock_engine_flush(engine);
> -	} while (i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT));
> +	} while (intel_gt_retire_requests_timeout(&i915->gt,
> +						  MAX_SCHEDULE_TIMEOUT));
>   }
>   
>   static void mock_device_release(struct drm_device *dev)
> @@ -98,10 +100,6 @@ static void release_dev(struct device *dev)
>   	kfree(pdev);
>   }
>   
> -static void mock_retire_work_handler(struct work_struct *work)
> -{
> -}
> -
>   static int pm_domain_resume(struct device *dev)
>   {
>   	return pm_generic_runtime_resume(dev);
> @@ -181,8 +179,6 @@ struct drm_i915_private *mock_gem_device(void)
>   
>   	mock_init_contexts(i915);
>   
> -	INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler);
> -
>   	intel_timelines_init(i915);
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH 22/30] drm/i915: Remove logical HW ID
  2019-10-02 11:19 ` [PATCH 22/30] drm/i915: Remove logical HW ID Chris Wilson
@ 2019-10-02 16:07   ` Tvrtko Ursulin
  0 siblings, 0 replies; 50+ messages in thread
From: Tvrtko Ursulin @ 2019-10-02 16:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 02/10/2019 12:19, Chris Wilson wrote:
> With the introduction of ctx->engines[] we allow multiple logical
> contexts to be used on the same engine (e.g. with virtual engines).
> According to bspec, aach logical context requires a unique tag in order
> for context-switching to occur correctly between them. [Simple
> experiments show that it is not so easy to trick the HW into performing
> a lite-restore with matching logical IDs, though my memory from early
> Broadwell experiments do suggest that it should be generating
> lite-restores.]
> 
> We only need to keep a unique tag for the active lifetime of the
> context, and for as long as we need to identify that context. The HW
> uses the tag to determine if it should use a lite-restore (why not the
> LRCA?) and passes the tag back for various status identifies. The only
> status we need to track is for OA, so when using perf, we assign the
> specific context a unique tag.
> 
> v2: Calculate required number of tags to fill ELSP.
> 
> Fixes: 976b55f0e1db ("drm/i915: Allow a context to define its set of engines")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Acked-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 147 ------------------
>   drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 --
>   .../gpu/drm/i915/gem/i915_gem_context_types.h |  18 ---
>   .../drm/i915/gem/selftests/i915_gem_context.c |  13 +-
>   .../gpu/drm/i915/gem/selftests/mock_context.c |   8 -
>   drivers/gpu/drm/i915/gt/intel_context_types.h |   1 +
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |   4 +-
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  32 ++--
>   drivers/gpu/drm/i915/gvt/kvmgt.c              |  17 --
>   drivers/gpu/drm/i915/i915_debugfs.c           |   3 -
>   drivers/gpu/drm/i915/i915_gpu_error.c         |   7 +-
>   drivers/gpu/drm/i915/i915_gpu_error.h         |   1 -
>   drivers/gpu/drm/i915/i915_perf.c              |  25 ++-
>   drivers/gpu/drm/i915/i915_trace.h             |  38 ++---
>   .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
>   drivers/gpu/drm/i915/selftests/i915_vma.c     |   2 +-
>   16 files changed, 51 insertions(+), 284 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index e832238a72e5..4e59b809d901 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -168,97 +168,6 @@ lookup_user_engine(struct i915_gem_context *ctx,
>   	return i915_gem_context_get_engine(ctx, idx);
>   }
>   
> -static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
> -{
> -	unsigned int max;
> -
> -	lockdep_assert_held(&i915->contexts.mutex);
> -
> -	if (INTEL_GEN(i915) >= 12)
> -		max = GEN12_MAX_CONTEXT_HW_ID;
> -	else if (INTEL_GEN(i915) >= 11)
> -		max = GEN11_MAX_CONTEXT_HW_ID;
> -	else if (USES_GUC_SUBMISSION(i915))
> -		/*
> -		 * When using GuC in proxy submission, GuC consumes the
> -		 * highest bit in the context id to indicate proxy submission.
> -		 */
> -		max = MAX_GUC_CONTEXT_HW_ID;
> -	else
> -		max = MAX_CONTEXT_HW_ID;
> -
> -	return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
> -}
> -
> -static int steal_hw_id(struct drm_i915_private *i915)
> -{
> -	struct i915_gem_context *ctx, *cn;
> -	LIST_HEAD(pinned);
> -	int id = -ENOSPC;
> -
> -	lockdep_assert_held(&i915->contexts.mutex);
> -
> -	list_for_each_entry_safe(ctx, cn,
> -				 &i915->contexts.hw_id_list, hw_id_link) {
> -		if (atomic_read(&ctx->hw_id_pin_count)) {
> -			list_move_tail(&ctx->hw_id_link, &pinned);
> -			continue;
> -		}
> -
> -		GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
> -		list_del_init(&ctx->hw_id_link);
> -		id = ctx->hw_id;
> -		break;
> -	}
> -
> -	/*
> -	 * Remember how far we got up on the last repossesion scan, so the
> -	 * list is kept in a "least recently scanned" order.
> -	 */
> -	list_splice_tail(&pinned, &i915->contexts.hw_id_list);
> -	return id;
> -}
> -
> -static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
> -{
> -	int ret;
> -
> -	lockdep_assert_held(&i915->contexts.mutex);
> -
> -	/*
> -	 * We prefer to steal/stall ourselves and our users over that of the
> -	 * entire system. That may be a little unfair to our users, and
> -	 * even hurt high priority clients. The choice is whether to oomkill
> -	 * something else, or steal a context id.
> -	 */
> -	ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
> -	if (unlikely(ret < 0)) {
> -		ret = steal_hw_id(i915);
> -		if (ret < 0) /* once again for the correct errno code */
> -			ret = new_hw_id(i915, GFP_KERNEL);
> -		if (ret < 0)
> -			return ret;
> -	}
> -
> -	*out = ret;
> -	return 0;
> -}
> -
> -static void release_hw_id(struct i915_gem_context *ctx)
> -{
> -	struct drm_i915_private *i915 = ctx->i915;
> -
> -	if (list_empty(&ctx->hw_id_link))
> -		return;
> -
> -	mutex_lock(&i915->contexts.mutex);
> -	if (!list_empty(&ctx->hw_id_link)) {
> -		ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
> -		list_del_init(&ctx->hw_id_link);
> -	}
> -	mutex_unlock(&i915->contexts.mutex);
> -}
> -
>   static void __free_engines(struct i915_gem_engines *e, unsigned int count)
>   {
>   	while (count--) {
> @@ -313,8 +222,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>   
> -	release_hw_id(ctx);
> -
>   	free_engines(rcu_access_pointer(ctx->engines));
>   	mutex_destroy(&ctx->engines_mutex);
>   
> @@ -459,12 +366,6 @@ static void context_close(struct i915_gem_context *ctx)
>   
>   	ctx->file_priv = ERR_PTR(-EBADF);
>   
> -	/*
> -	 * This context will never again be assinged to HW, so we can
> -	 * reuse its ID for the next context.
> -	 */
> -	release_hw_id(ctx);
> -
>   	/*
>   	 * The LUT uses the VMA as a backpointer to unref the object,
>   	 * so we need to clear the LUT before we close all the VMA (inside
> @@ -507,7 +408,6 @@ __create_context(struct drm_i915_private *i915)
>   	RCU_INIT_POINTER(ctx->engines, e);
>   
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> -	INIT_LIST_HEAD(&ctx->hw_id_link);
>   
>   	/* NB: Mark all slices as needing a remap so that when the context first
>   	 * loads it will restore whatever remap state already exists. If there
> @@ -676,18 +576,11 @@ struct i915_gem_context *
>   i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>   {
>   	struct i915_gem_context *ctx;
> -	int err;
>   
>   	ctx = i915_gem_create_context(i915, 0);
>   	if (IS_ERR(ctx))
>   		return ctx;
>   
> -	err = i915_gem_context_pin_hw_id(ctx);
> -	if (err) {
> -		destroy_kernel_context(&ctx);
> -		return ERR_PTR(err);
> -	}
> -
>   	i915_gem_context_clear_bannable(ctx);
>   	i915_gem_context_set_persistence(ctx);
>   	ctx->sched.priority = I915_USER_PRIORITY(prio);
> @@ -727,15 +620,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>   		DRM_ERROR("Failed to create default global context\n");
>   		return PTR_ERR(ctx);
>   	}
> -	/*
> -	 * For easy recognisablity, we want the kernel context to be 0 and then
> -	 * all user contexts will have non-zero hw_id. Kernel contexts are
> -	 * permanently pinned, so that we never suffer a stall and can
> -	 * use them from any allocation context (e.g. for evicting other
> -	 * contexts and from inside the shrinker).
> -	 */
> -	GEM_BUG_ON(ctx->hw_id);
> -	GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
>   	dev_priv->kernel_context = ctx;
>   
>   	DRM_DEBUG_DRIVER("%s context support initialized\n",
> @@ -749,10 +633,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
>   	lockdep_assert_held(&i915->drm.struct_mutex);
>   
>   	destroy_kernel_context(&i915->kernel_context);
> -
> -	/* Must free all deferred contexts (via flush_workqueue) first */
> -	GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
> -	ida_destroy(&i915->contexts.hw_ida);
>   }
>   
>   static int context_idr_cleanup(int id, void *p, void *data)
> @@ -2438,33 +2318,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
>   	return ret;
>   }
>   
> -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
> -{
> -	struct drm_i915_private *i915 = ctx->i915;
> -	int err = 0;
> -
> -	mutex_lock(&i915->contexts.mutex);
> -
> -	GEM_BUG_ON(i915_gem_context_is_closed(ctx));
> -
> -	if (list_empty(&ctx->hw_id_link)) {
> -		GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
> -
> -		err = assign_hw_id(i915, &ctx->hw_id);
> -		if (err)
> -			goto out_unlock;
> -
> -		list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
> -	}
> -
> -	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
> -	atomic_inc(&ctx->hw_id_pin_count);
> -
> -out_unlock:
> -	mutex_unlock(&i915->contexts.mutex);
> -	return err;
> -}
> -
>   /* GEM context-engines iterator: for_each_gem_engine() */
>   struct intel_context *
>   i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> index e0f5b6c6a331..1b0df53436cf 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> @@ -127,21 +127,6 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
>   	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
>   }
>   
> -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
> -static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
> -{
> -	if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
> -		return 0;
> -
> -	return __i915_gem_context_pin_hw_id(ctx);
> -}
> -
> -static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
> -{
> -	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
> -	atomic_dec(&ctx->hw_id_pin_count);
> -}
> -
>   static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
>   {
>   	return !ctx->file_priv;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index daf1ea5075a6..6419da7c9f90 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -148,24 +148,6 @@ struct i915_gem_context {
>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
>   #define CONTEXT_USER_ENGINES		3
>   
> -	/**
> -	 * @hw_id: - unique identifier for the context
> -	 *
> -	 * The hardware needs to uniquely identify the context for a few
> -	 * functions like fault reporting, PASID, scheduling. The
> -	 * &drm_i915_private.context_hw_ida is used to assign a unqiue
> -	 * id for the lifetime of the context.
> -	 *
> -	 * @hw_id_pin_count: - number of times this context had been pinned
> -	 * for use (should be, at most, once per engine).
> -	 *
> -	 * @hw_id_link: - all contexts with an assigned id are tracked
> -	 * for possible repossession.
> -	 */
> -	unsigned int hw_id;
> -	atomic_t hw_id_pin_count;
> -	struct list_head hw_id_link;
> -
>   	struct mutex mutex;
>   
>   	struct i915_sched_attr sched;
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index 2288757808ae..2fb31ada2fa7 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -660,9 +660,9 @@ static int igt_ctx_exec(void *arg)
>   
>   			err = gpu_fill(ce, obj, dw);
>   			if (err) {
> -				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> +				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
>   				       ndwords, dw, max_dwords(obj),
> -				       engine->name, ctx->hw_id,
> +				       engine->name,
>   				       yesno(!!ctx->vm), err);
>   				intel_context_put(ce);
>   				kernel_context_close(ctx);
> @@ -798,9 +798,9 @@ static int igt_shared_ctx_exec(void *arg)
>   
>   			err = gpu_fill(ce, obj, dw);
>   			if (err) {
> -				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> +				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
>   				       ndwords, dw, max_dwords(obj),
> -				       engine->name, ctx->hw_id,
> +				       engine->name,
>   				       yesno(!!ctx->vm), err);
>   				intel_context_put(ce);
>   				kernel_context_close(ctx);
> @@ -1382,10 +1382,9 @@ static int igt_ctx_readonly(void *arg)
>   
>   			err = gpu_fill(ce, obj, dw);
>   			if (err) {
> -				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> +				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
>   				       ndwords, dw, max_dwords(obj),
> -				       ce->engine->name, ctx->hw_id,
> -				       yesno(!!ctx->vm), err);
> +				       ce->engine->name, yesno(!!ctx->vm), err);
>   				i915_gem_context_unlock_engines(ctx);
>   				goto out_unlock;
>   			}
> diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> index 0d1a275ec316..ebc46f098561 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> @@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915,
>   {
>   	struct i915_gem_context *ctx;
>   	struct i915_gem_engines *e;
> -	int ret;
>   
>   	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
>   	if (!ctx)
> @@ -32,13 +31,8 @@ mock_context(struct drm_i915_private *i915,
>   	RCU_INIT_POINTER(ctx->engines, e);
>   
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> -	INIT_LIST_HEAD(&ctx->hw_id_link);
>   	mutex_init(&ctx->mutex);
>   
> -	ret = i915_gem_context_pin_hw_id(ctx);
> -	if (ret < 0)
> -		goto err_engines;
> -
>   	if (name) {
>   		struct i915_ppgtt *ppgtt;
>   
> @@ -56,8 +50,6 @@ mock_context(struct drm_i915_private *i915,
>   
>   	return ctx;
>   
> -err_engines:
> -	free_engines(rcu_access_pointer(ctx->engines));
>   err_free:
>   	kfree(ctx);
>   	return NULL;
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index bf9cedfccbf0..6959b05ae5f8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -58,6 +58,7 @@ struct intel_context {
>   
>   	u32 *lrc_reg_state;
>   	u64 lrc_desc;
> +	u32 tag; /* cookie passed to HW to track this context on submission */
>   
>   	unsigned int active_count; /* protected by timeline->mutex */
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 25af66540a1b..ad3be2fbd71a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -301,10 +301,12 @@ struct intel_engine_cs {
>   	u8 uabi_class;
>   	u8 uabi_instance;
>   
> +	u32 uabi_capabilities;
>   	u32 context_size;
>   	u32 mmio_base;
>   
> -	u32 uabi_capabilities;
> +	unsigned int context_tag;
> +#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
>   
>   	struct rb_node uabi_node;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 6dcceaf02e00..890cd9ab07a0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -443,12 +443,8 @@ assert_priority_queue(const struct i915_request *prev,
>   static u64
>   lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
>   {
> -	struct i915_gem_context *ctx = ce->gem_context;
>   	u64 desc;
>   
> -	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
> -	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
> -
>   	desc = INTEL_LEGACY_32B_CONTEXT;
>   	if (i915_vm_is_4lvl(ce->vm))
>   		desc = INTEL_LEGACY_64B_CONTEXT;
> @@ -466,20 +462,11 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
>   	 * anything below.
>   	 */
>   	if (INTEL_GEN(engine->i915) >= 11) {
> -		GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
> -		desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
> -								/* bits 37-47 */
> -
>   		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
>   								/* bits 48-53 */
>   
> -		/* TODO: decide what to do with SW counter (bits 55-60) */
> -
>   		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
>   								/* bits 61-63 */
> -	} else {
> -		GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
> -		desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;	/* bits 32-52 */
>   	}
>   
>   	return desc;
> @@ -988,6 +975,18 @@ __execlists_schedule_in(struct i915_request *rq)
>   
>   	intel_context_get(ce);
>   
> +	if (ce->tag) {
> +		/* Use a fixed tag for OA and friends */
> +		ce->lrc_desc |= (u64)ce->tag << 32;
> +	} else {
> +		/* We don't need a strict matching tag, just different values */
> +		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
> +		ce->lrc_desc |=
> +			(u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
> +			GEN11_SW_CTX_ID_SHIFT;
> +		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
> +	}
> +
>   	intel_gt_pm_get(engine->gt);
>   	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
>   	intel_engine_context_in(engine);
> @@ -2235,7 +2234,6 @@ static void execlists_context_unpin(struct intel_context *ce)
>   	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
>   		      ce->engine);
>   
> -	i915_gem_context_unpin_hw_id(ce->gem_context);
>   	i915_gem_object_unpin_map(ce->state->obj);
>   	intel_ring_reset(ce->ring, ce->ring->tail);
>   }
> @@ -2285,18 +2283,12 @@ __execlists_context_pin(struct intel_context *ce,
>   		goto unpin_active;
>   	}
>   
> -	ret = i915_gem_context_pin_hw_id(ce->gem_context);
> -	if (ret)
> -		goto unpin_map;
> -
>   	ce->lrc_desc = lrc_descriptor(ce, engine);
>   	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
>   	__execlists_update_reg_state(ce, engine);
>   
>   	return 0;
>   
> -unpin_map:
> -	i915_gem_object_unpin_map(ce->state->obj);
>   unpin_active:
>   	intel_context_active_release(ce);
>   err:
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index 343d79c1cb7e..04a5a0d90823 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -1564,27 +1564,10 @@ vgpu_id_show(struct device *dev, struct device_attribute *attr,
>   	return sprintf(buf, "\n");
>   }
>   
> -static ssize_t
> -hw_id_show(struct device *dev, struct device_attribute *attr,
> -	   char *buf)
> -{
> -	struct mdev_device *mdev = mdev_from_dev(dev);
> -
> -	if (mdev) {
> -		struct intel_vgpu *vgpu = (struct intel_vgpu *)
> -			mdev_get_drvdata(mdev);
> -		return sprintf(buf, "%u\n",
> -			       vgpu->submission.shadow[0]->gem_context->hw_id);
> -	}
> -	return sprintf(buf, "\n");
> -}
> -
>   static DEVICE_ATTR_RO(vgpu_id);
> -static DEVICE_ATTR_RO(hw_id);
>   
>   static struct attribute *intel_vgpu_attrs[] = {
>   	&dev_attr_vgpu_id.attr,
> -	&dev_attr_hw_id.attr,
>   	NULL
>   };
>   
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 15c7cc3a23b3..454de27c4912 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1507,9 +1507,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
>   		struct intel_context *ce;
>   
>   		seq_puts(m, "HW context ");
> -		if (!list_empty(&ctx->hw_id_link))
> -			seq_printf(m, "%x [pin %u]", ctx->hw_id,
> -				   atomic_read(&ctx->hw_id_pin_count));
>   		if (ctx->pid) {
>   			struct task_struct *task;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index a86d28bda6dd..47239df653f2 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -471,9 +471,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
>   				const char *header,
>   				const struct drm_i915_error_context *ctx)
>   {
> -	err_printf(m, "%s%s[%d] hw_id %d, prio %d, guilty %d active %d\n",
> -		   header, ctx->comm, ctx->pid, ctx->hw_id,
> -		   ctx->sched_attr.priority, ctx->guilty, ctx->active);
> +	err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
> +		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
> +		   ctx->guilty, ctx->active);
>   }
>   
>   static void error_print_engine(struct drm_i915_error_state_buf *m,
> @@ -1262,7 +1262,6 @@ static bool record_context(struct drm_i915_error_context *e,
>   		rcu_read_unlock();
>   	}
>   
> -	e->hw_id = ctx->hw_id;
>   	e->sched_attr = ctx->sched;
>   	e->guilty = atomic_read(&ctx->guilty_count);
>   	e->active = atomic_read(&ctx->active_count);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index caaed5093d95..4dc36d6ee3a2 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -117,7 +117,6 @@ struct i915_gpu_state {
>   		struct drm_i915_error_context {
>   			char comm[TASK_COMM_LEN];
>   			pid_t pid;
> -			u32 hw_id;
>   			int active;
>   			int guilty;
>   			struct i915_sched_attr sched_attr;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 80055501eccb..ecfbc37b738b 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1283,22 +1283,15 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>   		} else {
>   			stream->specific_ctx_id_mask =
>   				(1U << GEN8_CTX_ID_WIDTH) - 1;
> -			stream->specific_ctx_id =
> -				upper_32_bits(ce->lrc_desc);
> -			stream->specific_ctx_id &=
> -				stream->specific_ctx_id_mask;
> +			stream->specific_ctx_id = stream->specific_ctx_id_mask;
>   		}
>   		break;
>   
>   	case 11:
>   	case 12: {
>   		stream->specific_ctx_id_mask =
> -			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
> -			((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
> -			((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
> -		stream->specific_ctx_id = upper_32_bits(ce->lrc_desc);
> -		stream->specific_ctx_id &=
> -			stream->specific_ctx_id_mask;
> +			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
> +		stream->specific_ctx_id = stream->specific_ctx_id_mask;
>   		break;
>   	}
>   
> @@ -1306,6 +1299,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>   		MISSING_CASE(INTEL_GEN(i915));
>   	}
>   
> +	ce->tag = stream->specific_ctx_id_mask;
> +
>   	DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
>   			 stream->specific_ctx_id,
>   			 stream->specific_ctx_id_mask);
> @@ -1324,12 +1319,14 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
>   {
>   	struct intel_context *ce;
>   
> -	stream->specific_ctx_id = INVALID_CTX_ID;
> -	stream->specific_ctx_id_mask = 0;
> -
>   	ce = fetch_and_zero(&stream->pinned_ctx);
> -	if (ce)
> +	if (ce) {
> +		ce->tag = 0; /* recomputed on next submission after parking */
>   		intel_context_unpin(ce);
> +	}
> +
> +	stream->specific_ctx_id = INVALID_CTX_ID;
> +	stream->specific_ctx_id_mask = 0;
>   }
>   
>   static void
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 24f2944da09d..1f2cf6cfafb5 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -665,7 +665,6 @@ TRACE_EVENT(i915_request_queue,
>   
>   	    TP_STRUCT__entry(
>   			     __field(u32, dev)
> -			     __field(u32, hw_id)
>   			     __field(u64, ctx)
>   			     __field(u16, class)
>   			     __field(u16, instance)
> @@ -675,7 +674,6 @@ TRACE_EVENT(i915_request_queue,
>   
>   	    TP_fast_assign(
>   			   __entry->dev = rq->i915->drm.primary->index;
> -			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
>   			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
> @@ -683,10 +681,9 @@ TRACE_EVENT(i915_request_queue,
>   			   __entry->flags = flags;
>   			   ),
>   
> -	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
> +	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
>   		      __entry->dev, __entry->class, __entry->instance,
> -		      __entry->hw_id, __entry->ctx, __entry->seqno,
> -		      __entry->flags)
> +		      __entry->ctx, __entry->seqno, __entry->flags)
>   );
>   
>   DECLARE_EVENT_CLASS(i915_request,
> @@ -695,7 +692,6 @@ DECLARE_EVENT_CLASS(i915_request,
>   
>   	    TP_STRUCT__entry(
>   			     __field(u32, dev)
> -			     __field(u32, hw_id)
>   			     __field(u64, ctx)
>   			     __field(u16, class)
>   			     __field(u16, instance)
> @@ -704,16 +700,15 @@ DECLARE_EVENT_CLASS(i915_request,
>   
>   	    TP_fast_assign(
>   			   __entry->dev = rq->i915->drm.primary->index;
> -			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
>   			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
>   			   __entry->seqno = rq->fence.seqno;
>   			   ),
>   
> -	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u",
> +	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u",
>   		      __entry->dev, __entry->class, __entry->instance,
> -		      __entry->hw_id, __entry->ctx, __entry->seqno)
> +		      __entry->ctx, __entry->seqno)
>   );
>   
>   DEFINE_EVENT(i915_request, i915_request_add,
> @@ -738,7 +733,6 @@ TRACE_EVENT(i915_request_in,
>   
>   	    TP_STRUCT__entry(
>   			     __field(u32, dev)
> -			     __field(u32, hw_id)
>   			     __field(u64, ctx)
>   			     __field(u16, class)
>   			     __field(u16, instance)
> @@ -749,7 +743,6 @@ TRACE_EVENT(i915_request_in,
>   
>   	    TP_fast_assign(
>   			   __entry->dev = rq->i915->drm.primary->index;
> -			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
>   			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
> @@ -758,9 +751,9 @@ TRACE_EVENT(i915_request_in,
>   			   __entry->port = port;
>   			   ),
>   
> -	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
> +	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
>   		      __entry->dev, __entry->class, __entry->instance,
> -		      __entry->hw_id, __entry->ctx, __entry->seqno,
> +		      __entry->ctx, __entry->seqno,
>   		      __entry->prio, __entry->port)
>   );
>   
> @@ -770,7 +763,6 @@ TRACE_EVENT(i915_request_out,
>   
>   	    TP_STRUCT__entry(
>   			     __field(u32, dev)
> -			     __field(u32, hw_id)
>   			     __field(u64, ctx)
>   			     __field(u16, class)
>   			     __field(u16, instance)
> @@ -780,7 +772,6 @@ TRACE_EVENT(i915_request_out,
>   
>   	    TP_fast_assign(
>   			   __entry->dev = rq->i915->drm.primary->index;
> -			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
>   			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
> @@ -788,10 +779,9 @@ TRACE_EVENT(i915_request_out,
>   			   __entry->completed = i915_request_completed(rq);
>   			   ),
>   
> -		    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u",
> +		    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u",
>   			      __entry->dev, __entry->class, __entry->instance,
> -			      __entry->hw_id, __entry->ctx, __entry->seqno,
> -			      __entry->completed)
> +			      __entry->ctx, __entry->seqno, __entry->completed)
>   );
>   
>   #else
> @@ -829,7 +819,6 @@ TRACE_EVENT(i915_request_wait_begin,
>   
>   	    TP_STRUCT__entry(
>   			     __field(u32, dev)
> -			     __field(u32, hw_id)
>   			     __field(u64, ctx)
>   			     __field(u16, class)
>   			     __field(u16, instance)
> @@ -845,7 +834,6 @@ TRACE_EVENT(i915_request_wait_begin,
>   	     */
>   	    TP_fast_assign(
>   			   __entry->dev = rq->i915->drm.primary->index;
> -			   __entry->hw_id = rq->gem_context->hw_id;
>   			   __entry->class = rq->engine->uabi_class;
>   			   __entry->instance = rq->engine->uabi_instance;
>   			   __entry->ctx = rq->fence.context;
> @@ -853,9 +841,9 @@ TRACE_EVENT(i915_request_wait_begin,
>   			   __entry->flags = flags;
>   			   ),
>   
> -	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
> +	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
>   		      __entry->dev, __entry->class, __entry->instance,
> -		      __entry->hw_id, __entry->ctx, __entry->seqno,
> +		      __entry->ctx, __entry->seqno,
>   		      __entry->flags)
>   );
>   
> @@ -958,19 +946,17 @@ DECLARE_EVENT_CLASS(i915_context,
>   	TP_STRUCT__entry(
>   			__field(u32, dev)
>   			__field(struct i915_gem_context *, ctx)
> -			__field(u32, hw_id)
>   			__field(struct i915_address_space *, vm)
>   	),
>   
>   	TP_fast_assign(
>   			__entry->dev = ctx->i915->drm.primary->index;
>   			__entry->ctx = ctx;
> -			__entry->hw_id = ctx->hw_id;
>   			__entry->vm = ctx->vm;
>   	),
>   
> -	TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u",
> -		  __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id)
> +	TP_printk("dev=%u, ctx=%p, ctx_vm=%p",
> +		  __entry->dev, __entry->ctx, __entry->vm)
>   )
>   
>   DEFINE_EVENT(i915_context, i915_context_create,
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index 52d2df843148..f39f0282e78c 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -491,8 +491,8 @@ static int igt_evict_contexts(void *arg)
>   			if (IS_ERR(rq)) {
>   				/* When full, fail_if_busy will trigger EBUSY */
>   				if (PTR_ERR(rq) != -EBUSY) {
> -					pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n",
> -					       ctx->hw_id, engine->name,
> +					pr_err("Unexpected error from request alloc (on %s): %d\n",
> +					       engine->name,
>   					       (int)PTR_ERR(rq));
>   					err = PTR_ERR(rq);
>   				}
> diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
> index 1c9db08f7c28..ac1ff558eb90 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_vma.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
> @@ -170,7 +170,7 @@ static int igt_vma_create(void *arg)
>   		}
>   
>   		nc = 0;
> -		for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) {
> +		for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) {
>   			for (; nc < num_ctx; nc++) {
>   				ctx = mock_context(i915, "mock");
>   				if (!ctx)
> 

I don't understand the OA part but for the rest:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore (rev2)
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (31 preceding siblings ...)
  2019-10-02 13:46 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2019-10-02 20:16 ` Patchwork
  2019-10-02 20:29 ` ✗ Fi.CI.SPARSE: " Patchwork
  2019-10-02 20:41 ` ✗ Fi.CI.BAT: failure " Patchwork
  34 siblings, 0 replies; 50+ messages in thread
From: Patchwork @ 2019-10-02 20:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore (rev2)
URL   : https://patchwork.freedesktop.org/series/67483/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
5ab21c1fa058 drm/i915/selftests: Exercise potential false lite-restore
0322b52184d0 dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
-:14: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 0fc89b6802ba ("dma-fence: Simply wrap dma_fence_signal_locked with dma_fence_signal")'
#14: 
See also 0fc89b6802ba ("dma-fence: Simply wrap dma_fence_signal_locked

total: 1 errors, 0 warnings, 0 checks, 24 lines checked
1a05d810f79f drm/mm: Pack allocated/scanned boolean into a bitfield
931f20d41378 drm/i915: Only track bound elements of the GTT
2a6d4f282fa6 drm/i915: Mark up address spaces that may need to allocate
be8578d54763 drm/i915: Pull i915_vma_pin under the vm->mutex
4404006876ec drm/i915: Push the i915_active.retire into a worker
81d85386f107 drm/i915: Coordinate i915_active with its own mutex
-:1340: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#1340: FILE: drivers/gpu/drm/i915/i915_active_types.h:49:
+	struct mutex mutex;

total: 0 errors, 0 warnings, 1 checks, 1481 lines checked
f2e5937f521c drm/i915: Move idle barrier cleanup into engine-pm
a34a2a0ff4b7 drm/i915: Drop struct_mutex from around i915_retire_requests()
34cf8af39814 drm/i915: Remove the GEM idle worker
4bc0f23154b4 drm/i915: Merge wait_for_timelines with retire_request
ee8a183e05ce drm/i915/gem: Retire directly for mmap-offset shrinking
a23c0260a779 drm/i915: Move request runtime management onto gt
-:236: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#236: 
new file mode 100644

-:241: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#241: FILE: drivers/gpu/drm/i915/gt/intel_gt_requests.c:1:
+/*

-:242: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#242: FILE: drivers/gpu/drm/i915/gt/intel_gt_requests.c:2:
+ * SPDX-License-Identifier: MIT

-:370: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#370: FILE: drivers/gpu/drm/i915/gt/intel_gt_requests.h:1:
+/*

-:371: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#371: FILE: drivers/gpu/drm/i915/gt/intel_gt_requests.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 725 lines checked
f210329ac8c8 drm/i915: Move global activity tracking from GEM to GT
f452d4758807 drm/i915: Expose engine properties via sysfs
-:68: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#68: 
new file mode 100644

-:73: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#73: FILE: drivers/gpu/drm/i915/gt/intel_engine_sysfs.c:1:
+/*

-:74: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#74: FILE: drivers/gpu/drm/i915/gt/intel_engine_sysfs.c:2:
+ * SPDX-License-Identifier: MIT

-:198: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#198: FILE: drivers/gpu/drm/i915/gt/intel_engine_sysfs.h:1:
+/*

-:199: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#199: FILE: drivers/gpu/drm/i915/gt/intel_engine_sysfs.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 5 warnings, 0 checks, 158 lines checked
81490be650d7 drm/i915/execlists: Force preemption
bc5ab3030d38 drm/i915: Mark up "sentinel" requests
258d11346b6a drm/i915/execlists: Cancel banned contexts on schedule-out
b6e6bd3f3585 drm/i915: Cancel non-persistent contexts on close
-:38: WARNING:BAD_SIGN_OFF: Duplicate signature
#38: 
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>

-:299: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#299: 
new file mode 100644

-:304: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#304: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:1:
+/*

-:305: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#305: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:2:
+ * SPDX-License-Identifier: MIT

-:366: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#366: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h:1:
+/*

-:367: WARNING:SPDX_LICENSE_TAG: Misplaced SPDX-License-Identifier tag - use line 1 instead
#367: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h:2:
+ * SPDX-License-Identifier: MIT

total: 0 errors, 6 warnings, 0 checks, 332 lines checked
128a9bd55770 drm/i915: Replace hangcheck by heartbeats
-:236: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#236: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:69:
+					  "%s heartbeat not ticking\n",

-:253: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#253: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:86:
+					      "stopped heartbeat on %s",

-:540: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#540: 
deleted file mode 100644

total: 0 errors, 3 warnings, 0 checks, 672 lines checked
42bd5709ec17 drm/i915: Remove logical HW ID
10e4db753ab1 drm/i915: Move context management under GEM
-:1061: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#1061: FILE: drivers/gpu/drm/i915/gt/intel_lrc.c:988:
+#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
                                   ^

total: 0 errors, 0 warnings, 1 checks, 1717 lines checked
1e0c926a87c1 drm/i915/overlay: Drop struct_mutex guard
de0dcfa1655b drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info
711644d1d77a drm/i915: Remove struct_mutex guard for debugfs/opregion
024590a3e7fd drm/i915: Drop struct_mutex from suspend state save/restore
066f93946d1d drm/i915/selftests: Drop vestigal struct_mutex guards
76f85559693d drm/i915: Drop struct_mutex from around GEM initialisation
56d46847a471 drm/i915: Flush idle barriers when waiting

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore (rev2)
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (32 preceding siblings ...)
  2019-10-02 20:16 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore (rev2) Patchwork
@ 2019-10-02 20:29 ` Patchwork
  2019-10-02 20:41 ` ✗ Fi.CI.BAT: failure " Patchwork
  34 siblings, 0 replies; 50+ messages in thread
From: Patchwork @ 2019-10-02 20:29 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore (rev2)
URL   : https://patchwork.freedesktop.org/series/67483/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.6.0
Commit: drm/i915/selftests: Exercise potential false lite-restore
Okay!

Commit: dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
Okay!

Commit: drm/mm: Pack allocated/scanned boolean into a bitfield
Okay!

Commit: drm/i915: Only track bound elements of the GTT
Okay!

Commit: drm/i915: Mark up address spaces that may need to allocate
Okay!

Commit: drm/i915: Pull i915_vma_pin under the vm->mutex
Okay!

Commit: drm/i915: Push the i915_active.retire into a worker
Okay!

Commit: drm/i915: Coordinate i915_active with its own mutex
Okay!

Commit: drm/i915: Move idle barrier cleanup into engine-pm
Okay!

Commit: drm/i915: Drop struct_mutex from around i915_retire_requests()
Okay!

Commit: drm/i915: Remove the GEM idle worker
Okay!

Commit: drm/i915: Merge wait_for_timelines with retire_request
Okay!

Commit: drm/i915/gem: Retire directly for mmap-offset shrinking
Okay!

Commit: drm/i915: Move request runtime management onto gt
Okay!

Commit: drm/i915: Move global activity tracking from GEM to GT
Okay!

Commit: drm/i915: Expose engine properties via sysfs
Okay!

Commit: drm/i915/execlists: Force preemption
Okay!

Commit: drm/i915: Mark up "sentinel" requests
Okay!

Commit: drm/i915/execlists: Cancel banned contexts on schedule-out
Okay!

Commit: drm/i915: Cancel non-persistent contexts on close
Okay!

Commit: drm/i915: Replace hangcheck by heartbeats
Okay!

Commit: drm/i915: Remove logical HW ID
Okay!

Commit: drm/i915: Move context management under GEM
Okay!

Commit: drm/i915/overlay: Drop struct_mutex guard
Okay!

Commit: drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore (rev2)
  2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
                   ` (33 preceding siblings ...)
  2019-10-02 20:29 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-10-02 20:41 ` Patchwork
  34 siblings, 0 replies; 50+ messages in thread
From: Patchwork @ 2019-10-02 20:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore (rev2)
URL   : https://patchwork.freedesktop.org/series/67483/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6992 -> Patchwork_14633
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_14633 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_14633, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_14633:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_execlists:
    - fi-bdw-gvtdvm:      [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-bdw-gvtdvm/igt@i915_selftest@live_execlists.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-bdw-gvtdvm/igt@i915_selftest@live_execlists.html
    - fi-whl-u:           [PASS][3] -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-whl-u/igt@i915_selftest@live_execlists.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-whl-u/igt@i915_selftest@live_execlists.html
    - fi-kbl-7500u:       NOTRUN -> [INCOMPLETE][5]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-kbl-7500u/igt@i915_selftest@live_execlists.html
    - fi-bsw-n3050:       [PASS][6] -> [INCOMPLETE][7]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-bsw-n3050/igt@i915_selftest@live_execlists.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-bsw-n3050/igt@i915_selftest@live_execlists.html
    - fi-bdw-5557u:       [PASS][8] -> [INCOMPLETE][9]
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-bdw-5557u/igt@i915_selftest@live_execlists.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-bdw-5557u/igt@i915_selftest@live_execlists.html

  * igt@runner@aborted:
    - fi-bdw-gvtdvm:      NOTRUN -> [FAIL][10]
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-bdw-gvtdvm/igt@runner@aborted.html
    - fi-cfl-8109u:       NOTRUN -> [FAIL][11]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-cfl-8109u/igt@runner@aborted.html
    - fi-kbl-7500u:       NOTRUN -> [FAIL][12]
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-kbl-7500u/igt@runner@aborted.html
    - fi-whl-u:           NOTRUN -> [FAIL][13]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-whl-u/igt@runner@aborted.html
    - fi-bxt-dsi:         NOTRUN -> [FAIL][14]
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-bxt-dsi/igt@runner@aborted.html
    - fi-bsw-n3050:       NOTRUN -> [FAIL][15]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-bsw-n3050/igt@runner@aborted.html
    - fi-bdw-5557u:       NOTRUN -> [FAIL][16]
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-bdw-5557u/igt@runner@aborted.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@runner@aborted:
    - {fi-cml-s}:         NOTRUN -> [FAIL][17]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-cml-s/igt@runner@aborted.html

  
Known issues
------------

  Here are the changes found in Patchwork_14633 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_reloc@basic-write-read-noreloc:
    - fi-icl-u3:          [PASS][18] -> [DMESG-WARN][19] ([fdo#107724]) +2 similar issues
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-icl-u3/igt@gem_exec_reloc@basic-write-read-noreloc.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-icl-u3/igt@gem_exec_reloc@basic-write-read-noreloc.html

  * igt@gem_exec_suspend@basic-s3:
    - fi-blb-e6850:       [PASS][20] -> [INCOMPLETE][21] ([fdo#107718])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html

  * igt@i915_selftest@live_execlists:
    - fi-icl-u2:          [PASS][22] -> [INCOMPLETE][23] ([fdo#107713])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-icl-u2/igt@i915_selftest@live_execlists.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-icl-u2/igt@i915_selftest@live_execlists.html
    - fi-icl-u3:          [PASS][24] -> [INCOMPLETE][25] ([fdo#107713])
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-icl-u3/igt@i915_selftest@live_execlists.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-icl-u3/igt@i915_selftest@live_execlists.html
    - fi-bxt-dsi:         [PASS][26] -> [INCOMPLETE][27] ([fdo#103927])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-bxt-dsi/igt@i915_selftest@live_execlists.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-bxt-dsi/igt@i915_selftest@live_execlists.html
    - fi-cfl-8109u:       [PASS][28] -> [INCOMPLETE][29] ([fdo#110977])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-cfl-8109u/igt@i915_selftest@live_execlists.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-cfl-8109u/igt@i915_selftest@live_execlists.html

  
#### Possible fixes ####

  * igt@gem_exec_gttfill@basic:
    - fi-apl-guc:         [DMESG-WARN][30] ([fdo#109385] / [fdo#111652 ]) -> [PASS][31]
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-apl-guc/igt@gem_exec_gttfill@basic.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-apl-guc/igt@gem_exec_gttfill@basic.html

  * igt@gem_mmap_gtt@basic:
    - fi-icl-u3:          [DMESG-WARN][32] ([fdo#107724]) -> [PASS][33] +1 similar issue
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6992/fi-icl-u3/igt@gem_mmap_gtt@basic.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/fi-icl-u3/igt@gem_mmap_gtt@basic.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109100]: https://bugs.freedesktop.org/show_bug.cgi?id=109100
  [fdo#109385]: https://bugs.freedesktop.org/show_bug.cgi?id=109385
  [fdo#110566]: https://bugs.freedesktop.org/show_bug.cgi?id=110566
  [fdo#110977]: https://bugs.freedesktop.org/show_bug.cgi?id=110977
  [fdo#111652 ]: https://bugs.freedesktop.org/show_bug.cgi?id=111652 


Participating hosts (51 -> 44)
------------------------------

  Additional (1): fi-kbl-7500u 
  Missing    (8): fi-ilk-m540 fi-tgl-u fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6992 -> Patchwork_14633

  CI-20190529: 20190529
  CI_DRM_6992: 7116520f059e12d50596bdbe73b48ba8e8d50daf @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5210: 74f55119f9920b65996535210a09147997804136 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14633: 56d46847a471aeebbd34dd8d7dacf5847d312a5d @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

56d46847a471 drm/i915: Flush idle barriers when waiting
76f85559693d drm/i915: Drop struct_mutex from around GEM initialisation
066f93946d1d drm/i915/selftests: Drop vestigal struct_mutex guards
024590a3e7fd drm/i915: Drop struct_mutex from suspend state save/restore
711644d1d77a drm/i915: Remove struct_mutex guard for debugfs/opregion
de0dcfa1655b drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info
1e0c926a87c1 drm/i915/overlay: Drop struct_mutex guard
10e4db753ab1 drm/i915: Move context management under GEM
42bd5709ec17 drm/i915: Remove logical HW ID
128a9bd55770 drm/i915: Replace hangcheck by heartbeats
b6e6bd3f3585 drm/i915: Cancel non-persistent contexts on close
258d11346b6a drm/i915/execlists: Cancel banned contexts on schedule-out
bc5ab3030d38 drm/i915: Mark up "sentinel" requests
81490be650d7 drm/i915/execlists: Force preemption
f452d4758807 drm/i915: Expose engine properties via sysfs
f210329ac8c8 drm/i915: Move global activity tracking from GEM to GT
a23c0260a779 drm/i915: Move request runtime management onto gt
ee8a183e05ce drm/i915/gem: Retire directly for mmap-offset shrinking
4bc0f23154b4 drm/i915: Merge wait_for_timelines with retire_request
34cf8af39814 drm/i915: Remove the GEM idle worker
a34a2a0ff4b7 drm/i915: Drop struct_mutex from around i915_retire_requests()
f2e5937f521c drm/i915: Move idle barrier cleanup into engine-pm
81d85386f107 drm/i915: Coordinate i915_active with its own mutex
4404006876ec drm/i915: Push the i915_active.retire into a worker
be8578d54763 drm/i915: Pull i915_vma_pin under the vm->mutex
2a6d4f282fa6 drm/i915: Mark up address spaces that may need to allocate
931f20d41378 drm/i915: Only track bound elements of the GTT
1a05d810f79f drm/mm: Pack allocated/scanned boolean into a bitfield
0322b52184d0 dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling)
5ab21c1fa058 drm/i915/selftests: Exercise potential false lite-restore

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14633/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 50+ messages in thread

end of thread, other threads:[~2019-10-02 20:41 UTC | newest]

Thread overview: 50+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-02 11:19 [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore Chris Wilson
2019-10-02 11:19 ` [PATCH 02/30] dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling) Chris Wilson
2019-10-02 11:19 ` [PATCH 03/30] drm/mm: Pack allocated/scanned boolean into a bitfield Chris Wilson
2019-10-02 11:19 ` [PATCH 04/30] drm/i915: Only track bound elements of the GTT Chris Wilson
2019-10-02 11:19 ` [PATCH 05/30] drm/i915: Mark up address spaces that may need to allocate Chris Wilson
2019-10-02 15:47   ` Tvrtko Ursulin
2019-10-02 11:19 ` [PATCH 06/30] drm/i915: Pull i915_vma_pin under the vm->mutex Chris Wilson
2019-10-02 11:19 ` [PATCH 07/30] drm/i915: Push the i915_active.retire into a worker Chris Wilson
2019-10-02 11:19 ` [PATCH 08/30] drm/i915: Coordinate i915_active with its own mutex Chris Wilson
2019-10-02 15:49   ` Tvrtko Ursulin
2019-10-02 11:19 ` [PATCH 09/30] drm/i915: Move idle barrier cleanup into engine-pm Chris Wilson
2019-10-02 11:19 ` [PATCH 10/30] drm/i915: Drop struct_mutex from around i915_retire_requests() Chris Wilson
2019-10-02 11:19 ` [PATCH 11/30] drm/i915: Remove the GEM idle worker Chris Wilson
2019-10-02 11:19 ` [PATCH 12/30] drm/i915: Merge wait_for_timelines with retire_request Chris Wilson
2019-10-02 11:19 ` [PATCH 13/30] drm/i915/gem: Retire directly for mmap-offset shrinking Chris Wilson
2019-10-02 15:53   ` Tvrtko Ursulin
2019-10-02 11:19 ` [PATCH 14/30] drm/i915: Move request runtime management onto gt Chris Wilson
2019-10-02 15:58   ` Tvrtko Ursulin
2019-10-02 11:19 ` [PATCH 15/30] drm/i915: Move global activity tracking from GEM to GT Chris Wilson
2019-10-02 11:19 ` [PATCH 16/30] drm/i915: Expose engine properties via sysfs Chris Wilson
2019-10-02 11:19 ` [PATCH 17/30] drm/i915/execlists: Force preemption Chris Wilson
2019-10-02 11:19 ` [PATCH 18/30] drm/i915: Mark up "sentinel" requests Chris Wilson
2019-10-02 11:19 ` [PATCH 19/30] drm/i915/execlists: Cancel banned contexts on schedule-out Chris Wilson
2019-10-02 11:19 ` [PATCH 20/30] drm/i915: Cancel non-persistent contexts on close Chris Wilson
2019-10-02 13:52   ` Bloomfield, Jon
2019-10-02 14:05     ` Chris Wilson
2019-10-02 14:23     ` Chris Wilson
2019-10-02 14:40       ` Bloomfield, Jon
2019-10-02 14:23   ` [PATCH v2] " Chris Wilson
2019-10-02 14:36     ` Bloomfield, Jon
2019-10-02 11:19 ` [PATCH 21/30] drm/i915: Replace hangcheck by heartbeats Chris Wilson
2019-10-02 13:55   ` Bloomfield, Jon
2019-10-02 14:28     ` Chris Wilson
2019-10-02 11:19 ` [PATCH 22/30] drm/i915: Remove logical HW ID Chris Wilson
2019-10-02 16:07   ` Tvrtko Ursulin
2019-10-02 11:19 ` [PATCH 23/30] drm/i915: Move context management under GEM Chris Wilson
2019-10-02 11:19 ` [PATCH 24/30] drm/i915/overlay: Drop struct_mutex guard Chris Wilson
2019-10-02 11:19 ` [PATCH 25/30] drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info Chris Wilson
2019-10-02 11:19 ` [PATCH 26/30] drm/i915: Remove struct_mutex guard for debugfs/opregion Chris Wilson
2019-10-02 11:19 ` [PATCH 27/30] drm/i915: Drop struct_mutex from suspend state save/restore Chris Wilson
2019-10-02 11:19 ` [PATCH 28/30] drm/i915/selftests: Drop vestigal struct_mutex guards Chris Wilson
2019-10-02 11:19 ` [PATCH 29/30] drm/i915: Drop struct_mutex from around GEM initialisation Chris Wilson
2019-10-02 11:20 ` [PATCH 30/30] drm/i915: Flush idle barriers when waiting Chris Wilson
2019-10-02 13:21 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore Patchwork
2019-10-02 13:34 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-10-02 13:46 ` ✗ Fi.CI.BAT: failure " Patchwork
2019-10-02 13:49   ` Chris Wilson
2019-10-02 20:16 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/30] drm/i915/selftests: Exercise potential false lite-restore (rev2) Patchwork
2019-10-02 20:29 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-10-02 20:41 ` ✗ Fi.CI.BAT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.