Endless busyness, the forecoming

All of lore.kernel.org
 help / color / mirror / Atom feed

* Endless busyness, the forecoming
@ 2019-06-12  9:31 Chris Wilson
  2019-06-12  9:31 ` [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
                   ` (15 more replies)
  0 siblings, 16 replies; 31+ messages in thread
From: Chris Wilson @ 2019-06-12  9:31 UTC (permalink / raw)
  To: intel-gfx

Please kindly review. Same old patches.
-Chris


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
@ 2019-06-12  9:31 ` Chris Wilson
  2019-06-12 13:29   ` Mika Kuoppala
  2019-06-12 14:26   ` [PATCH v2] " Chris Wilson
  2019-06-12  9:31 ` [PATCH 2/8] drm/i915: Stop retiring along engine Chris Wilson
                   ` (14 subsequent siblings)
  15 siblings, 2 replies; 31+ messages in thread
From: Chris Wilson @ 2019-06-12  9:31 UTC (permalink / raw)
  To: intel-gfx

We need to keep the context image pinned in memory until after the GPU
has finished writing into it. Since it continues to write as we signal
the final breadcrumb, we need to keep it pinned until the request after
it is complete. Currently we know the order in which requests execute on
each engine, and so to remove that presumption we need to identify a
request/context-switch we know must occur after our completion. Any
request queued after the signal must imply a context switch, for
simplicity we use a fresh request from the kernel context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 24 ++----
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  1 -
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        | 20 ++++-
 drivers/gpu/drm/i915/gt/intel_context.c       | 80 ++++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_context.h       |  3 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |  6 +-
 drivers/gpu/drm/i915/gt/intel_engine.h        |  2 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 23 +-----
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  2 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 13 +--
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 62 ++------------
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 44 +---------
 drivers/gpu/drm/i915/gt/mock_engine.c         | 11 +--
 drivers/gpu/drm/i915/i915_active.c            | 80 ++++++++++++++++++-
 drivers/gpu/drm/i915/i915_active.h            |  5 ++
 drivers/gpu/drm/i915/i915_active_types.h      |  3 +
 drivers/gpu/drm/i915/i915_gem.c               |  4 -
 drivers/gpu/drm/i915/i915_request.c           | 15 ----
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 -
 19 files changed, 214 insertions(+), 185 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index c86ca9f21532..6200060aef05 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -692,17 +692,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for_each_engine(engine, dev_priv, id)
-		intel_engine_lost_context(engine);
-}
-
 void i915_gem_contexts_fini(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -1203,10 +1192,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	if (ret)
 		goto out_add;
 
-	ret = gen8_emit_rpcs_config(rq, ce, sseu);
-	if (ret)
-		goto out_add;
-
 	/*
 	 * Guarantee context image and the timeline remains pinned until the
 	 * modifying request is retired by setting the ce activity tracker.
@@ -1214,9 +1199,12 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	 * But we only need to take one pin on the account of it. Or in other
 	 * words transfer the pinned ce object to tracked active request.
 	 */
-	if (!i915_active_request_isset(&ce->active_tracker))
-		__intel_context_pin(ce);
-	__i915_active_request_set(&ce->active_tracker, rq);
+	GEM_BUG_ON(i915_active_is_idle(&ce->active));
+	ret = i915_active_ref(&ce->active, rq->fence.context, rq);
+	if (ret)
+		goto out_add;
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
 out_add:
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 630392c77e48..9691dd062f72 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -134,7 +134,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 
 /* i915_gem_context.c */
 int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
 void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index f40f13c0b8b7..59b6d45b1936 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -10,6 +10,22 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+		struct i915_active_request *active =
+			container_of((struct list_head *)node,
+				     typeof(*active), link);
+
+		INIT_LIST_HEAD(&active->link);
+		RCU_INIT_POINTER(active->request, NULL);
+
+		active->retire(active, NULL);
+	}
+}
+
 static void i915_gem_park(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -17,8 +33,10 @@ static void i915_gem_park(struct drm_i915_private *i915)
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	for_each_engine(engine, i915, id)
+	for_each_engine(engine, i915, id) {
+		call_idle_barriers(engine); /* cleanup after wedging */
 		i915_gem_batch_pool_fini(&engine->batch_pool);
+	}
 
 	i915_timelines_park(i915);
 	i915_vma_parked(i915);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index c78ec0b58e77..c10eb4904264 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce)
 
 		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
 
-		intel_context_get(ce);
 		smp_mb__before_atomic(); /* flush pin before it is visible */
 	}
 
@@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce)
 		ce->ops->unpin(ce);
 
 		i915_gem_context_put(ce->gem_context);
-		intel_context_put(ce);
+		intel_context_inactive(ce);
 	}
 
 	mutex_unlock(&ce->pin_mutex);
 	intel_context_put(ce);
 }
 
-static void intel_context_retire(struct i915_active_request *active,
-				 struct i915_request *rq)
+static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
 {
-	struct intel_context *ce =
-		container_of(active, typeof(*ce), active_tracker);
+	int err;
 
-	intel_context_unpin(ce);
+	err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
+	if (err)
+		return err;
+
+	/*
+	 * And mark it as a globally pinned object to let the shrinker know
+	 * it cannot reclaim the object until we release it.
+	 */
+	vma->obj->pin_global++;
+	vma->obj->mm.dirty = true;
+
+	return 0;
+}
+
+static void __context_unpin_state(struct i915_vma *vma)
+{
+	vma->obj->pin_global--;
+	__i915_vma_unpin(vma);
+}
+
+static void intel_context_retire(struct i915_active *active)
+{
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+	if (ce->state)
+		__context_unpin_state(ce->state);
+
+	intel_context_put(ce);
 }
 
 void
@@ -125,8 +149,46 @@ intel_context_init(struct intel_context *ce,
 
 	mutex_init(&ce->pin_mutex);
 
-	i915_active_request_init(&ce->active_tracker,
-				 NULL, intel_context_retire);
+	i915_active_init(ctx->i915, &ce->active, intel_context_retire);
+}
+
+int intel_context_active(struct intel_context *ce, unsigned long flags)
+{
+	int err;
+
+	if (!i915_active_acquire(&ce->active))
+		return 0;
+
+	intel_context_get(ce);
+
+	if (!ce->state)
+		return 0;
+
+	err = __context_pin_state(ce->state, flags);
+	if (err) {
+		i915_active_cancel(&ce->active);
+		intel_context_put(ce);
+		return err;
+	}
+
+	/* Preallocate tracking nodes */
+	if (!i915_gem_context_is_kernel(ce->gem_context)) {
+		err = i915_active_acquire_preallocate_barrier(&ce->active,
+							      ce->engine);
+		if (err) {
+			i915_active_release(&ce->active);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+void intel_context_inactive(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	i915_active_acquire_barrier(&ce->active);
+	i915_active_release(&ce->active);
 }
 
 static void i915_global_context_shrink(void)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 6d5453ba2c1e..4de4ba2df7d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -102,6 +102,9 @@ static inline void intel_context_exit(struct intel_context *ce)
 		ce->ops->exit(ce);
 }
 
+int intel_context_active(struct intel_context *ce, unsigned long flags);
+void intel_context_inactive(struct intel_context *ce);
+
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
 	kref_get(&ce->ref);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 825fcf0ac9c4..e95be4be9612 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -56,10 +56,10 @@ struct intel_context {
 	intel_engine_mask_t saturated; /* submitting semaphores too late? */
 
 	/**
-	 * active_tracker: Active tracker for the external rq activity
-	 * on this intel_context object.
+	 * active: Active tracker for the rq activity (inc. external) on this
+	 * intel_context object.
 	 */
-	struct i915_active_request active_tracker;
+	struct i915_active active;
 
 	const struct intel_context_ops *ops;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 201bbd2a4faf..b9fd88f21609 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -466,8 +466,6 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-void intel_engine_lost_context(struct intel_engine_cs *engine);
-
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c0d986db5a75..5a08036ae774 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -611,6 +611,8 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 {
 	int err;
 
+	init_llist_head(&engine->barrier_tasks);
+
 	err = init_status_page(engine);
 	if (err)
 		return err;
@@ -870,6 +872,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->preempt_context)
 		intel_context_unpin(engine->preempt_context);
 	intel_context_unpin(engine->kernel_context);
+	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 
 	i915_timeline_fini(&engine->timeline);
 
@@ -1201,26 +1204,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 		engine->set_default_submission(engine);
 }
 
-/**
- * intel_engine_lost_context: called when the GPU is reset into unknown state
- * @engine: the engine
- *
- * We have either reset the GPU or otherwise about to lose state tracking of
- * the current GPU logical state (e.g. suspend). On next use, it is therefore
- * imperative that we make no presumptions about the current state and load
- * from scratch.
- */
-void intel_engine_lost_context(struct intel_engine_cs *engine)
-{
-	struct intel_context *ce;
-
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-}
-
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index ccf034764741..3c448a061abd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -88,6 +88,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 
 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
+
+	i915_request_add_barriers(rq);
 	__i915_request_commit(rq);
 
 	return false;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 01223864237a..33a31aa2d2ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -11,6 +11,7 @@
 #include <linux/irq_work.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/types.h>
 
 #include "i915_gem.h"
@@ -288,6 +289,7 @@ struct intel_engine_cs {
 	struct intel_ring *buffer;
 
 	struct i915_timeline timeline;
+	struct llist_head barrier_tasks;
 
 	struct intel_context *kernel_context; /* pinned */
 	struct intel_context *preempt_context; /* pinned; optional */
@@ -435,17 +437,6 @@ struct intel_engine_cs {
 
 	struct intel_engine_execlists execlists;
 
-	/* Contexts are pinned whilst they are active on the GPU. The last
-	 * context executed remains active whilst the GPU is idle - the
-	 * switch away and write to the context object only occurs on the
-	 * next execution.  Contexts are only unpinned on retirement of the
-	 * following request ensuring that we can always write to the object
-	 * on the context switch even after idling. Across suspend, we switch
-	 * to the kernel context and trash it as the save may not happen
-	 * before the hardware is powered down.
-	 */
-	struct intel_context *last_retired_context;
-
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b8f5592da18f..05524489615c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1422,60 +1422,11 @@ static void execlists_context_destroy(struct kref *kref)
 	intel_context_free(ce);
 }
 
-static int __context_pin(struct i915_vma *vma)
-{
-	unsigned int flags;
-	int err;
-
-	flags = PIN_GLOBAL | PIN_HIGH;
-	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
-	err = i915_vma_pin(vma, 0, 0, flags);
-	if (err)
-		return err;
-
-	vma->obj->pin_global++;
-	vma->obj->mm.dirty = true;
-
-	return 0;
-}
-
-static void __context_unpin(struct i915_vma *vma)
-{
-	vma->obj->pin_global--;
-	__i915_vma_unpin(vma);
-}
-
 static void execlists_context_unpin(struct intel_context *ce)
 {
-	struct intel_engine_cs *engine;
-
-	/*
-	 * The tasklet may still be using a pointer to our state, via an
-	 * old request. However, since we know we only unpin the context
-	 * on retirement of the following request, we know that the last
-	 * request referencing us will have had a completion CS interrupt.
-	 * If we see that it is still active, it means that the tasklet hasn't
-	 * had the chance to run yet; let it run before we teardown the
-	 * reference it may use.
-	 */
-	engine = READ_ONCE(ce->inflight);
-	if (unlikely(engine)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&engine->timeline.lock, flags);
-		process_csb(engine);
-		spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-		GEM_BUG_ON(READ_ONCE(ce->inflight));
-	}
-
 	i915_gem_context_unpin_hw_id(ce->gem_context);
-
-	intel_ring_unpin(ce->ring);
-
 	i915_gem_object_unpin_map(ce->state->obj);
-	__context_unpin(ce->state);
+	intel_ring_unpin(ce->ring);
 }
 
 static void
@@ -1512,7 +1463,10 @@ __execlists_context_pin(struct intel_context *ce,
 		goto err;
 	GEM_BUG_ON(!ce->state);
 
-	ret = __context_pin(ce->state);
+	ret = intel_context_active(ce,
+				   engine->i915->ggtt.pin_bias |
+				   PIN_OFFSET_BIAS |
+				   PIN_HIGH);
 	if (ret)
 		goto err;
 
@@ -1521,7 +1475,7 @@ __execlists_context_pin(struct intel_context *ce,
 					I915_MAP_OVERRIDE);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
-		goto unpin_vma;
+		goto unpin_active;
 	}
 
 	ret = intel_ring_pin(ce->ring);
@@ -1542,8 +1496,8 @@ __execlists_context_pin(struct intel_context *ce,
 	intel_ring_unpin(ce->ring);
 unpin_map:
 	i915_gem_object_unpin_map(ce->state->obj);
-unpin_vma:
-	__context_unpin(ce->state);
+unpin_active:
+	intel_context_inactive(ce);
 err:
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index c834d016c965..7ab28b6f62a1 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1349,45 +1349,9 @@ static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
 		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
 }
 
-static int __context_pin(struct intel_context *ce)
-{
-	struct i915_vma *vma;
-	int err;
-
-	vma = ce->state;
-	if (!vma)
-		return 0;
-
-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-	if (err)
-		return err;
-
-	/*
-	 * And mark is as a globally pinned object to let the shrinker know
-	 * it cannot reclaim the object until we release it.
-	 */
-	vma->obj->pin_global++;
-	vma->obj->mm.dirty = true;
-
-	return 0;
-}
-
-static void __context_unpin(struct intel_context *ce)
-{
-	struct i915_vma *vma;
-
-	vma = ce->state;
-	if (!vma)
-		return;
-
-	vma->obj->pin_global--;
-	i915_vma_unpin(vma);
-}
-
 static void ring_context_unpin(struct intel_context *ce)
 {
 	__context_unpin_ppgtt(ce->gem_context);
-	__context_unpin(ce);
 }
 
 static struct i915_vma *
@@ -1477,18 +1441,18 @@ static int ring_context_pin(struct intel_context *ce)
 		ce->state = vma;
 	}
 
-	err = __context_pin(ce);
+	err = intel_context_active(ce, PIN_HIGH);
 	if (err)
 		return err;
 
 	err = __context_pin_ppgtt(ce->gem_context);
 	if (err)
-		goto err_unpin;
+		goto err_active;
 
 	return 0;
 
-err_unpin:
-	__context_unpin(ce);
+err_active:
+	intel_context_inactive(ce);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 6d7562769eb2..b7675ef18523 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -146,12 +146,18 @@ static void mock_context_destroy(struct kref *ref)
 
 static int mock_context_pin(struct intel_context *ce)
 {
+	int ret;
+
 	if (!ce->ring) {
 		ce->ring = mock_ring(ce->engine);
 		if (!ce->ring)
 			return -ENOMEM;
 	}
 
+	ret = intel_context_active(ce, PIN_HIGH);
+	if (ret)
+		return ret;
+
 	mock_timeline_pin(ce->ring->timeline);
 	return 0;
 }
@@ -328,14 +334,9 @@ void mock_engine_free(struct intel_engine_cs *engine)
 {
 	struct mock_engine *mock =
 		container_of(engine, typeof(*mock), base);
-	struct intel_context *ce;
 
 	GEM_BUG_ON(timer_pending(&mock->hw_delay));
 
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-
 	intel_context_unpin(engine->kernel_context);
 
 	intel_engine_fini_breadcrumbs(engine);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 863ae12707ba..100e40afc9d6 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -100,7 +100,7 @@ active_instance(struct i915_active *ref, u64 idx)
 		parent = *p;
 
 		node = rb_entry(parent, struct active_node, node);
-		if (node->timeline == idx)
+		if (node->timeline == idx && !IS_ERR(node->base.request))
 			goto replace;
 
 		if (node->timeline < idx)
@@ -157,6 +157,7 @@ void i915_active_init(struct drm_i915_private *i915,
 	ref->retire = retire;
 	ref->tree = RB_ROOT;
 	i915_active_request_init(&ref->last, NULL, last_retire);
+	init_llist_head(&ref->barriers);
 	ref->count = 0;
 }
 
@@ -263,6 +264,83 @@ void i915_active_fini(struct i915_active *ref)
 }
 #endif
 
+int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
+					    struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	unsigned long tmp;
+	int err = 0;
+
+	GEM_BUG_ON(!engine->mask);
+	for_each_engine_masked(engine, i915, engine->mask, tmp) {
+		struct intel_context *kctx = engine->kernel_context;
+		struct active_node *node;
+
+		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
+		if (unlikely(!node)) {
+			err = -ENOMEM;
+			break;
+		}
+
+		i915_active_request_init(&node->base,
+					 (void *)engine, node_retire);
+		node->timeline = kctx->ring->timeline->fence_context;
+		node->ref = ref;
+		ref->count++;
+
+		llist_add((struct llist_node *)&node->base.link,
+			  &ref->barriers);
+	}
+
+	return err;
+}
+
+void i915_active_acquire_barrier(struct i915_active *ref)
+{
+	struct llist_node *pos, *next;
+
+	i915_active_acquire(ref);
+
+	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
+		struct intel_engine_cs *engine;
+		struct active_node *node;
+		struct rb_node **p, *parent;
+
+		node = container_of((struct list_head *)pos,
+				    typeof(*node), base.link);
+
+		engine = (void *)rcu_access_pointer(node->base.request);
+		RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
+
+		parent = NULL;
+		p = &ref->tree.rb_node;
+		while (*p) {
+			parent = *p;
+			if (rb_entry(parent,
+				     struct active_node,
+				     node)->timeline < node->timeline)
+				p = &parent->rb_right;
+			else
+				p = &parent->rb_left;
+		}
+		rb_link_node(&node->node, parent, p);
+		rb_insert_color(&node->node, &ref->tree);
+
+		llist_add((struct llist_node *)&node->base.link,
+			  &engine->barrier_tasks);
+	}
+	i915_active_release(ref);
+}
+
+void i915_request_add_barriers(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
+		list_add_tail((struct list_head *)node, &rq->active_list);
+}
+
 int i915_active_request_set(struct i915_active_request *active,
 			    struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 7d758719ce39..d55d37673944 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -406,4 +406,9 @@ void i915_active_fini(struct i915_active *ref);
 static inline void i915_active_fini(struct i915_active *ref) { }
 #endif
 
+int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
+					    struct intel_engine_cs *engine);
+void i915_active_acquire_barrier(struct i915_active *ref);
+void i915_request_add_barriers(struct i915_request *rq);
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index b679253b53a5..c025991b9233 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -7,6 +7,7 @@
 #ifndef _I915_ACTIVE_TYPES_H_
 #define _I915_ACTIVE_TYPES_H_
 
+#include <linux/llist.h>
 #include <linux/rbtree.h>
 #include <linux/rcupdate.h>
 
@@ -31,6 +32,8 @@ struct i915_active {
 	unsigned int count;
 
 	void (*retire)(struct i915_active *ref);
+
+	struct llist_head barriers;
 };
 
 #endif /* _I915_ACTIVE_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e980c1ee3dcf..0663f2df65d6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1197,10 +1197,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(i915, wakeref);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	i915_gem_contexts_lost(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e9b59eea4f10..9eff9de7fa10 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -213,18 +213,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	spin_unlock(&rq->lock);
 
 	local_irq_enable();
-
-	/*
-	 * The backing object for the context is done after switching to the
-	 * *next* context. Therefore we cannot retire the previous context until
-	 * the next context has already started running. However, since we
-	 * cannot take the required locks at i915_request_submit() we
-	 * defer the unpinning of the active context to now, retirement of
-	 * the subsequent request.
-	 */
-	if (engine->last_retired_context)
-		intel_context_unpin(engine->last_retired_context);
-	engine->last_retired_context = rq->hw_context;
 }
 
 static void __retire_engine_upto(struct intel_engine_cs *engine,
@@ -759,9 +747,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 
 	rq->infix = rq->ring->emit; /* end of header; start of user payload */
 
-	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(ce);
-
 	intel_context_mark_active(ce);
 	return rq;
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index b7f3fbb4ae89..a96d0c012d46 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -56,7 +56,6 @@ static void mock_device_release(struct drm_device *dev)
 
 	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	flush_work(&i915->gem.idle_work);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 2/8] drm/i915: Stop retiring along engine
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
  2019-06-12  9:31 ` [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
@ 2019-06-12  9:31 ` Chris Wilson
  2019-06-14 14:23   ` Mika Kuoppala
  2019-06-12  9:31 ` [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list Chris Wilson
                   ` (13 subsequent siblings)
  15 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2019-06-12  9:31 UTC (permalink / raw)
  To: intel-gfx

We no longer track the execution order along the engine and so no longer
need to enforce ordering of retire along the engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 128 +++++++++++-----------------
 1 file changed, 52 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 9eff9de7fa10..9c58ae6e4afb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -183,72 +183,23 @@ static void free_capture_list(struct i915_request *request)
 	}
 }
 
-static void __retire_engine_request(struct intel_engine_cs *engine,
-				    struct i915_request *rq)
-{
-	GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n",
-		  __func__, engine->name,
-		  rq->fence.context, rq->fence.seqno,
-		  hwsp_seqno(rq));
-
-	GEM_BUG_ON(!i915_request_completed(rq));
-
-	local_irq_disable();
-
-	spin_lock(&engine->timeline.lock);
-	GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
-	list_del_init(&rq->link);
-	spin_unlock(&engine->timeline.lock);
-
-	spin_lock(&rq->lock);
-	i915_request_mark_complete(rq);
-	if (!i915_request_signaled(rq))
-		dma_fence_signal_locked(&rq->fence);
-	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
-		i915_request_cancel_breadcrumb(rq);
-	if (rq->waitboost) {
-		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
-		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
-	}
-	spin_unlock(&rq->lock);
-
-	local_irq_enable();
-}
-
-static void __retire_engine_upto(struct intel_engine_cs *engine,
-				 struct i915_request *rq)
-{
-	struct i915_request *tmp;
-
-	if (list_empty(&rq->link))
-		return;
-
-	do {
-		tmp = list_first_entry(&engine->timeline.requests,
-				       typeof(*tmp), link);
-
-		GEM_BUG_ON(tmp->engine != engine);
-		__retire_engine_request(engine, tmp);
-	} while (tmp != rq);
-}
-
-static void i915_request_retire(struct i915_request *request)
+static bool i915_request_retire(struct i915_request *rq)
 {
 	struct i915_active_request *active, *next;
 
-	GEM_TRACE("%s fence %llx:%lld, current %d\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno,
-		  hwsp_seqno(request));
+	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	if (!i915_request_completed(rq))
+		return false;
 
-	lockdep_assert_held(&request->i915->drm.struct_mutex);
-	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
-	GEM_BUG_ON(!i915_request_completed(request));
+	GEM_TRACE("%s fence %llx:%lld, current %d\n",
+		  rq->engine->name,
+		  rq->fence.context, rq->fence.seqno,
+		  hwsp_seqno(rq));
 
-	trace_i915_request_retire(request);
+	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+	trace_i915_request_retire(rq);
 
-	advance_ring(request);
-	free_capture_list(request);
+	advance_ring(rq);
 
 	/*
 	 * Walk through the active list, calling retire on each. This allows
@@ -260,7 +211,7 @@ static void i915_request_retire(struct i915_request *request)
 	 * pass along the auxiliary information (to avoid dereferencing
 	 * the node after the callback).
 	 */
-	list_for_each_entry_safe(active, next, &request->active_list, link) {
+	list_for_each_entry_safe(active, next, &rq->active_list, link) {
 		/*
 		 * In microbenchmarks or focusing upon time inside the kernel,
 		 * we may spend an inordinate amount of time simply handling
@@ -276,18 +227,39 @@ static void i915_request_retire(struct i915_request *request)
 		INIT_LIST_HEAD(&active->link);
 		RCU_INIT_POINTER(active->request, NULL);
 
-		active->retire(active, request);
+		active->retire(active, rq);
+	}
+
+	local_irq_disable();
+
+	spin_lock(&rq->engine->timeline.lock);
+	list_del(&rq->link);
+	spin_unlock(&rq->engine->timeline.lock);
+
+	spin_lock(&rq->lock);
+	i915_request_mark_complete(rq);
+	if (!i915_request_signaled(rq))
+		dma_fence_signal_locked(&rq->fence);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
+		i915_request_cancel_breadcrumb(rq);
+	if (rq->waitboost) {
+		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
+		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
 	}
+	spin_unlock(&rq->lock);
+
+	local_irq_enable();
 
-	i915_request_remove_from_client(request);
+	intel_context_exit(rq->hw_context);
+	intel_context_unpin(rq->hw_context);
 
-	__retire_engine_upto(request->engine, request);
+	i915_request_remove_from_client(rq);
 
-	intel_context_exit(request->hw_context);
-	intel_context_unpin(request->hw_context);
+	free_capture_list(rq);
+	i915_sched_node_fini(&rq->sched);
+	i915_request_put(rq);
 
-	i915_sched_node_fini(&request->sched);
-	i915_request_put(request);
+	return true;
 }
 
 void i915_request_retire_upto(struct i915_request *rq)
@@ -309,9 +281,7 @@ void i915_request_retire_upto(struct i915_request *rq)
 	do {
 		tmp = list_first_entry(&ring->request_list,
 				       typeof(*tmp), ring_link);
-
-		i915_request_retire(tmp);
-	} while (tmp != rq);
+	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
 static void irq_execute_cb(struct irq_work *wrk)
@@ -600,12 +570,9 @@ static void ring_retire_requests(struct intel_ring *ring)
 {
 	struct i915_request *rq, *rn;
 
-	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) {
-		if (!i915_request_completed(rq))
+	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
+		if (!i915_request_retire(rq))
 			break;
-
-		i915_request_retire(rq);
-	}
 }
 
 static noinline struct i915_request *
@@ -620,6 +587,15 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
 	if (!gfpflags_allow_blocking(gfp))
 		goto out;
 
+	/* Move our oldest request to the slab-cache (if not in use!) */
+	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
+	i915_request_retire(rq);
+
+	rq = kmem_cache_alloc(global.slab_requests,
+			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+	if (rq)
+		return rq;
+
 	/* Ratelimit ourselves to prevent oom from malicious clients */
 	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
 	cond_synchronize_rcu(rq->rcustate);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
  2019-06-12  9:31 ` [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
  2019-06-12  9:31 ` [PATCH 2/8] drm/i915: Stop retiring along engine Chris Wilson
@ 2019-06-12  9:31 ` Chris Wilson
  2019-06-14 14:34   ` Mika Kuoppala
  2019-06-14 15:50   ` Mika Kuoppala
  2019-06-12  9:31 ` [PATCH 4/8] drm/i915: Flush the execution-callbacks on retiring Chris Wilson
                   ` (12 subsequent siblings)
  15 siblings, 2 replies; 31+ messages in thread
From: Chris Wilson @ 2019-06-12  9:31 UTC (permalink / raw)
  To: intel-gfx

To continue the onslaught of removing the assumption of a global
execution ordering, another casualty is the engine->timeline. Without an
actual timeline to track, it is overkill and we can replace it with a
much less grand plain list. We still need a list of requests inflight,
for the simple purpose of finding inflight requests (for retiring,
resetting, preemption etc).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  6 ++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 62 ++++++------
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  6 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 95 ++++++++++---------
 drivers/gpu/drm/i915/gt/intel_reset.c         | 10 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 15 ++-
 drivers/gpu/drm/i915/gt/mock_engine.c         | 18 ++--
 drivers/gpu/drm/i915/i915_gpu_error.c         |  5 +-
 drivers/gpu/drm/i915/i915_request.c           | 43 +++------
 drivers/gpu/drm/i915/i915_request.h           |  2 +-
 drivers/gpu/drm/i915/i915_scheduler.c         | 38 ++++----
 drivers/gpu/drm/i915/i915_timeline.c          |  1 -
 drivers/gpu/drm/i915/i915_timeline.h          | 19 ----
 drivers/gpu/drm/i915/i915_timeline_types.h    |  4 -
 drivers/gpu/drm/i915/intel_guc_submission.c   | 16 ++--
 .../gpu/drm/i915/selftests/mock_timeline.c    |  1 -
 16 files changed, 153 insertions(+), 188 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index b9fd88f21609..6be607e9c084 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -564,4 +564,10 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
 
 #endif
 
+void intel_engine_init_active(struct intel_engine_cs *engine,
+			      unsigned int subclass);
+#define ENGINE_PHYSICAL	0
+#define ENGINE_MOCK	1
+#define ENGINE_VIRTUAL	2
+
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5a08036ae774..01f50cfd517c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -617,14 +617,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 	if (err)
 		return err;
 
-	err = i915_timeline_init(engine->i915,
-				 &engine->timeline,
-				 engine->status_page.vma);
-	if (err)
-		goto err_hwsp;
-
-	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
-
+	intel_engine_init_active(engine, ENGINE_PHYSICAL);
 	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlists(engine);
 	intel_engine_init_hangcheck(engine);
@@ -637,10 +630,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
 
 	return 0;
-
-err_hwsp:
-	cleanup_status_page(engine);
-	return err;
 }
 
 /**
@@ -797,6 +786,27 @@ static int pin_context(struct i915_gem_context *ctx,
 	return 0;
 }
 
+void
+intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
+{
+	INIT_LIST_HEAD(&engine->active.requests);
+
+	spin_lock_init(&engine->active.lock);
+	lockdep_set_subclass(&engine->active.lock, subclass);
+
+	/*
+	 * Due to an interesting quirk in lockdep's internal debug tracking,
+	 * after setting a subclass we must ensure the lock is used. Otherwise,
+	 * nr_unused_locks is incremented once too often.
+	 */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	local_irq_disable();
+	lock_map_acquire(&engine->active.lock.dep_map);
+	lock_map_release(&engine->active.lock.dep_map);
+	local_irq_enable();
+#endif
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -860,6 +870,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
  */
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
+	GEM_BUG_ON(!list_empty(&engine->active.requests));
+
 	cleanup_status_page(engine);
 
 	intel_engine_fini_breadcrumbs(engine);
@@ -874,8 +886,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	intel_context_unpin(engine->kernel_context);
 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 
-	i915_timeline_fini(&engine->timeline);
-
 	intel_wa_list_free(&engine->ctx_wa_list);
 	intel_wa_list_free(&engine->wa_list);
 	intel_wa_list_free(&engine->whitelist);
@@ -1482,16 +1492,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	drm_printf(m, "\tRequests:\n");
 
-	rq = list_first_entry(&engine->timeline.requests,
-			      struct i915_request, link);
-	if (&rq->link != &engine->timeline.requests)
-		print_request(m, rq, "\t\tfirst  ");
-
-	rq = list_last_entry(&engine->timeline.requests,
-			     struct i915_request, link);
-	if (&rq->link != &engine->timeline.requests)
-		print_request(m, rq, "\t\tlast   ");
-
 	rq = intel_engine_find_active_request(engine);
 	if (rq) {
 		print_request(m, rq, "\t\tactive ");
@@ -1572,7 +1572,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	if (!intel_engine_supports_stats(engine))
 		return -ENODEV;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	write_seqlock(&engine->stats.lock);
 
 	if (unlikely(engine->stats.enabled == ~0)) {
@@ -1598,7 +1598,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 unlock:
 	write_sequnlock(&engine->stats.lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	return err;
 }
@@ -1683,22 +1683,22 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
 	 * At all other times, we must assume the GPU is still running, but
 	 * we only care about the snapshot of this moment.
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	list_for_each_entry(request, &engine->timeline.requests, link) {
+	spin_lock_irqsave(&engine->active.lock, flags);
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (i915_request_completed(request))
 			continue;
 
 		if (!i915_request_started(request))
-			break;
+			continue;
 
 		/* More than one preemptible request may match! */
 		if (!match_ring(request))
-			break;
+			continue;
 
 		active = request;
 		break;
 	}
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	return active;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 33a31aa2d2ae..b2faca8e5dec 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -288,7 +288,11 @@ struct intel_engine_cs {
 
 	struct intel_ring *buffer;
 
-	struct i915_timeline timeline;
+	struct {
+		spinlock_t lock;
+		struct list_head requests;
+	} active;
+
 	struct llist_head barrier_tasks;
 
 	struct intel_context *kernel_context; /* pinned */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 05524489615c..853376895505 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -298,8 +298,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	 * Check against the first request in ELSP[1], it will, thanks to the
 	 * power of PI, be the highest priority of that context.
 	 */
-	if (!list_is_last(&rq->link, &engine->timeline.requests) &&
-	    rq_prio(list_next_entry(rq, link)) > last_prio)
+	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
+	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 		return true;
 
 	if (rb) {
@@ -434,11 +434,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	struct list_head *uninitialized_var(pl);
 	int prio = I915_PRIORITY_INVALID;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	list_for_each_entry_safe_reverse(rq, rn,
-					 &engine->timeline.requests,
-					 link) {
+					 &engine->active.requests,
+					 sched.link) {
 		struct intel_engine_cs *owner;
 
 		if (i915_request_completed(rq))
@@ -465,7 +465,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			}
 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
-			list_add(&rq->sched.link, pl);
+			list_move(&rq->sched.link, pl);
 			active = rq;
 		} else {
 			rq->engine = owner;
@@ -933,11 +933,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 		struct i915_request *rq;
 
-		spin_lock(&ve->base.timeline.lock);
+		spin_lock(&ve->base.active.lock);
 
 		rq = ve->request;
 		if (unlikely(!rq)) { /* lost the race to a sibling */
-			spin_unlock(&ve->base.timeline.lock);
+			spin_unlock(&ve->base.active.lock);
 			rb_erase_cached(rb, &execlists->virtual);
 			RB_CLEAR_NODE(rb);
 			rb = rb_first_cached(&execlists->virtual);
@@ -950,13 +950,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 		if (rq_prio(rq) >= queue_prio(execlists)) {
 			if (!virtual_matches(ve, rq, engine)) {
-				spin_unlock(&ve->base.timeline.lock);
+				spin_unlock(&ve->base.active.lock);
 				rb = rb_next(rb);
 				continue;
 			}
 
 			if (last && !can_merge_rq(last, rq)) {
-				spin_unlock(&ve->base.timeline.lock);
+				spin_unlock(&ve->base.active.lock);
 				return; /* leave this rq for another engine */
 			}
 
@@ -1011,7 +1011,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			last = rq;
 		}
 
-		spin_unlock(&ve->base.timeline.lock);
+		spin_unlock(&ve->base.active.lock);
 		break;
 	}
 
@@ -1068,8 +1068,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				GEM_BUG_ON(port_isset(port));
 			}
 
-			list_del_init(&rq->sched.link);
-
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
 
@@ -1170,7 +1168,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -1330,7 +1328,7 @@ static void process_csb(struct intel_engine_cs *engine)
 
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	process_csb(engine);
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
@@ -1351,15 +1349,16 @@ static void execlists_submission_tasklet(unsigned long data)
 		  !!intel_wakeref_active(&engine->wakeref),
 		  engine->execlists.active);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	__execlists_submission_tasklet(engine);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_sched_node *node,
 			  int prio)
 {
+	GEM_BUG_ON(!list_empty(&node->link));
 	list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
 }
 
@@ -1390,7 +1389,7 @@ static void execlists_submit_request(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	queue_request(engine, &request->sched, rq_prio(request));
 
@@ -1399,7 +1398,7 @@ static void execlists_submit_request(struct i915_request *request)
 
 	submit_queue(engine, rq_prio(request));
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void __execlists_context_fini(struct intel_context *ce)
@@ -2050,8 +2049,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	intel_engine_stop_cs(engine);
 
 	/* And flush any current direct submission. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static bool lrc_regs_ok(const struct i915_request *rq)
@@ -2094,11 +2093,11 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
 
 static struct i915_request *active_request(struct i915_request *rq)
 {
-	const struct list_head * const list = &rq->engine->timeline.requests;
+	const struct list_head * const list = &rq->engine->active.requests;
 	const struct intel_context * const context = rq->hw_context;
 	struct i915_request *active = NULL;
 
-	list_for_each_entry_from_reverse(rq, list, link) {
+	list_for_each_entry_from_reverse(rq, list, sched.link) {
 		if (i915_request_completed(rq))
 			break;
 
@@ -2215,11 +2214,11 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 
 	GEM_TRACE("%s\n", engine->name);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__execlists_reset(engine, stalled);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void nop_submission_tasklet(unsigned long data)
@@ -2250,12 +2249,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	 * submission's irq state, we also wish to remind ourselves that
 	 * it is irq state.)
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__execlists_reset(engine, true);
 
 	/* Mark all executing requests as skipped. */
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(rq))
 			dma_fence_set_error(&rq->fence, -EIO);
 
@@ -2286,7 +2285,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		rb_erase_cached(rb, &execlists->virtual);
 		RB_CLEAR_NODE(rb);
 
-		spin_lock(&ve->base.timeline.lock);
+		spin_lock(&ve->base.active.lock);
 		if (ve->request) {
 			ve->request->engine = engine;
 			__i915_request_submit(ve->request);
@@ -2295,7 +2294,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 			ve->base.execlists.queue_priority_hint = INT_MIN;
 			ve->request = NULL;
 		}
-		spin_unlock(&ve->base.timeline.lock);
+		spin_unlock(&ve->base.active.lock);
 	}
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
@@ -2307,7 +2306,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
 	execlists->tasklet.func = nop_submission_tasklet;
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
@@ -3010,12 +3009,18 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
 	return ret;
 }
 
+static struct list_head *virtual_queue(struct virtual_engine *ve)
+{
+	return &ve->base.execlists.default_priolist.requests[0];
+}
+
 static void virtual_context_destroy(struct kref *kref)
 {
 	struct virtual_engine *ve =
 		container_of(kref, typeof(*ve), context.ref);
 	unsigned int n;
 
+	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
 	GEM_BUG_ON(ve->request);
 	GEM_BUG_ON(ve->context.inflight);
 
@@ -3026,13 +3031,13 @@ static void virtual_context_destroy(struct kref *kref)
 		if (RB_EMPTY_NODE(node))
 			continue;
 
-		spin_lock_irq(&sibling->timeline.lock);
+		spin_lock_irq(&sibling->active.lock);
 
 		/* Detachment is lazily performed in the execlists tasklet */
 		if (!RB_EMPTY_NODE(node))
 			rb_erase_cached(node, &sibling->execlists.virtual);
 
-		spin_unlock_irq(&sibling->timeline.lock);
+		spin_unlock_irq(&sibling->active.lock);
 	}
 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
 
@@ -3040,8 +3045,6 @@ static void virtual_context_destroy(struct kref *kref)
 		__execlists_context_fini(&ve->context);
 
 	kfree(ve->bonds);
-
-	i915_timeline_fini(&ve->base.timeline);
 	kfree(ve);
 }
 
@@ -3161,16 +3164,16 @@ static void virtual_submission_tasklet(unsigned long data)
 
 		if (unlikely(!(mask & sibling->mask))) {
 			if (!RB_EMPTY_NODE(&node->rb)) {
-				spin_lock(&sibling->timeline.lock);
+				spin_lock(&sibling->active.lock);
 				rb_erase_cached(&node->rb,
 						&sibling->execlists.virtual);
 				RB_CLEAR_NODE(&node->rb);
-				spin_unlock(&sibling->timeline.lock);
+				spin_unlock(&sibling->active.lock);
 			}
 			continue;
 		}
 
-		spin_lock(&sibling->timeline.lock);
+		spin_lock(&sibling->active.lock);
 
 		if (!RB_EMPTY_NODE(&node->rb)) {
 			/*
@@ -3214,7 +3217,7 @@ static void virtual_submission_tasklet(unsigned long data)
 			tasklet_hi_schedule(&sibling->execlists.tasklet);
 		}
 
-		spin_unlock(&sibling->timeline.lock);
+		spin_unlock(&sibling->active.lock);
 	}
 	local_irq_enable();
 }
@@ -3231,9 +3234,13 @@ static void virtual_submit_request(struct i915_request *rq)
 	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
 
 	GEM_BUG_ON(ve->request);
+	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+
 	ve->base.execlists.queue_priority_hint = rq_prio(rq);
 	WRITE_ONCE(ve->request, rq);
 
+	list_move_tail(&rq->sched.link, virtual_queue(ve));
+
 	tasklet_schedule(&ve->base.execlists.tasklet);
 }
 
@@ -3297,10 +3304,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 
 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
 
-	err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
-	if (err)
-		goto err_put;
-	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
+	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
 
 	intel_engine_init_execlists(&ve->base);
 
@@ -3311,6 +3315,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 	ve->base.submit_request = virtual_submit_request;
 	ve->base.bond_execute = virtual_bond_execute;
 
+	INIT_LIST_HEAD(virtual_queue(ve));
 	ve->base.execlists.queue_priority_hint = INT_MIN;
 	tasklet_init(&ve->base.execlists.tasklet,
 		     virtual_submission_tasklet,
@@ -3465,11 +3470,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 	unsigned int count;
 	struct rb_node *rb;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	last = NULL;
 	count = 0;
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (count++ < max - 1)
 			show_request(m, rq, "\t\tE ");
 		else
@@ -3532,7 +3537,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 		show_request(m, last, "\t\tV ");
 	}
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 void intel_lr_context_reset(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 60d24110af80..cf258ec38ba6 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -49,12 +49,12 @@ static void engine_skip_context(struct i915_request *rq)
 	struct intel_engine_cs *engine = rq->engine;
 	struct i915_gem_context *hung_ctx = rq->gem_context;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (!i915_request_is_active(rq))
 		return;
 
-	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
+	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
 		if (rq->gem_context == hung_ctx)
 			i915_request_skip(rq, -EIO);
 }
@@ -130,7 +130,7 @@ void i915_reset_request(struct i915_request *rq, bool guilty)
 		  rq->fence.seqno,
 		  yesno(guilty));
 
-	lockdep_assert_held(&rq->engine->timeline.lock);
+	lockdep_assert_held(&rq->engine->active.lock);
 	GEM_BUG_ON(i915_request_completed(rq));
 
 	if (guilty) {
@@ -785,10 +785,10 @@ static void nop_submit_request(struct i915_request *request)
 		  engine->name, request->fence.context, request->fence.seqno);
 	dma_fence_set_error(&request->fence, -EIO);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	__i915_request_submit(request);
 	i915_request_mark_complete(request);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	intel_engine_queue_breadcrumbs(engine);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 7ab28b6f62a1..669aa036242d 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -730,14 +730,13 @@ static void reset_prepare(struct intel_engine_cs *engine)
 
 static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 {
-	struct i915_timeline *tl = &engine->timeline;
 	struct i915_request *pos, *rq;
 	unsigned long flags;
 	u32 head;
 
 	rq = NULL;
-	spin_lock_irqsave(&tl->lock, flags);
-	list_for_each_entry(pos, &tl->requests, link) {
+	spin_lock_irqsave(&engine->active.lock, flags);
+	list_for_each_entry(pos, &engine->active.requests, sched.link) {
 		if (!i915_request_completed(pos)) {
 			rq = pos;
 			break;
@@ -791,7 +790,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 	}
 	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
 
-	spin_unlock_irqrestore(&tl->lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void reset_finish(struct intel_engine_cs *engine)
@@ -877,10 +876,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
 	struct i915_request *request;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Mark all submitted requests as skipped. */
-	list_for_each_entry(request, &engine->timeline.requests, link) {
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(request))
 			dma_fence_set_error(&request->fence, -EIO);
 
@@ -889,7 +888,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void i9xx_submit_request(struct i915_request *request)
@@ -1267,8 +1266,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 
 	GEM_BUG_ON(!is_power_of_2(size));
 	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
-	GEM_BUG_ON(timeline == &engine->timeline);
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 	if (!ring)
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index b7675ef18523..00c666d3e652 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -229,17 +229,17 @@ static void mock_cancel_requests(struct intel_engine_cs *engine)
 	struct i915_request *request;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Mark all submitted requests as skipped. */
-	list_for_each_entry(request, &engine->timeline.requests, sched.link) {
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(request))
 			dma_fence_set_error(&request->fence, -EIO);
 
 		i915_request_mark_complete(request);
 	}
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@@ -285,28 +285,23 @@ int mock_engine_init(struct intel_engine_cs *engine)
 	struct drm_i915_private *i915 = engine->i915;
 	int err;
 
+	intel_engine_init_active(engine, ENGINE_MOCK);
 	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlists(engine);
 	intel_engine_init__pm(engine);
 
-	if (i915_timeline_init(i915, &engine->timeline, NULL))
-		goto err_breadcrumbs;
-	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
-
 	engine->kernel_context =
 		i915_gem_context_get_engine(i915->kernel_context, engine->id);
 	if (IS_ERR(engine->kernel_context))
-		goto err_timeline;
+		goto err_breadcrumbs;
 
 	err = intel_context_pin(engine->kernel_context);
 	intel_context_put(engine->kernel_context);
 	if (err)
-		goto err_timeline;
+		goto err_breadcrumbs;
 
 	return 0;
 
-err_timeline:
-	i915_timeline_fini(&engine->timeline);
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(engine);
 	return -ENOMEM;
@@ -340,7 +335,6 @@ void mock_engine_free(struct intel_engine_cs *engine)
 	intel_context_unpin(engine->kernel_context);
 
 	intel_engine_fini_breadcrumbs(engine);
-	i915_timeline_fini(&engine->timeline);
 
 	kfree(engine);
 }
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index dc026d5cd7a0..4cbee4c206bd 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1275,7 +1275,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->timeline.requests, link)
+	list_for_each_entry_from(request, &engine->active.requests, sched.link)
 		count++;
 	if (!count)
 		return;
@@ -1288,7 +1288,8 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->timeline.requests, link) {
+	list_for_each_entry_from(request,
+				 &engine->active.requests, sched.link) {
 		if (count >= ee->num_requests) {
 			/*
 			 * If the ring request list was changed in
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 9c58ae6e4afb..6b0a4d9343a6 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -232,9 +232,9 @@ static bool i915_request_retire(struct i915_request *rq)
 
 	local_irq_disable();
 
-	spin_lock(&rq->engine->timeline.lock);
-	list_del(&rq->link);
-	spin_unlock(&rq->engine->timeline.lock);
+	spin_lock(&rq->engine->active.lock);
+	list_del(&rq->sched.link);
+	spin_unlock(&rq->engine->active.lock);
 
 	spin_lock(&rq->lock);
 	i915_request_mark_complete(rq);
@@ -254,6 +254,7 @@ static bool i915_request_retire(struct i915_request *rq)
 	intel_context_unpin(rq->hw_context);
 
 	i915_request_remove_from_client(rq);
+	list_del(&rq->link);
 
 	free_capture_list(rq);
 	i915_sched_node_fini(&rq->sched);
@@ -373,28 +374,17 @@ __i915_request_await_execution(struct i915_request *rq,
 	return 0;
 }
 
-static void move_to_timeline(struct i915_request *request,
-			     struct i915_timeline *timeline)
-{
-	GEM_BUG_ON(request->timeline == &request->engine->timeline);
-	lockdep_assert_held(&request->engine->timeline.lock);
-
-	spin_lock(&request->timeline->lock);
-	list_move_tail(&request->link, &timeline->requests);
-	spin_unlock(&request->timeline->lock);
-}
-
 void __i915_request_submit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld -> current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, current %d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (i915_gem_context_is_banned(request->gem_context))
 		i915_request_skip(request, -EIO);
@@ -422,6 +412,8 @@ void __i915_request_submit(struct i915_request *request)
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
+	list_move_tail(&request->sched.link, &engine->active.requests);
+
 	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
 	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 
@@ -437,9 +429,6 @@ void __i915_request_submit(struct i915_request *request)
 	engine->emit_fini_breadcrumb(request,
 				     request->ring->vaddr + request->postfix);
 
-	/* Transfer from per-context onto the global per-engine timeline */
-	move_to_timeline(request, &engine->timeline);
-
 	engine->serial++;
 
 	trace_i915_request_execute(request);
@@ -451,11 +440,11 @@ void i915_request_submit(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__i915_request_submit(request);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 void __i915_request_unsubmit(struct i915_request *request)
@@ -468,7 +457,7 @@ void __i915_request_unsubmit(struct i915_request *request)
 		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	/*
 	 * Only unwind in reverse order, required so that the per-context list
@@ -486,9 +475,6 @@ void __i915_request_unsubmit(struct i915_request *request)
 
 	spin_unlock(&request->lock);
 
-	/* Transfer back from the global per-engine timeline to per-context */
-	move_to_timeline(request, request->timeline);
-
 	/* We've already spun, don't charge on resubmitting. */
 	if (request->sched.semaphores && i915_request_started(request)) {
 		request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
@@ -510,11 +496,11 @@ void i915_request_unsubmit(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__i915_request_unsubmit(request);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static int __i915_sw_fence_call
@@ -669,7 +655,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq->engine = ce->engine;
 	rq->ring = ce->ring;
 	rq->timeline = tl;
-	GEM_BUG_ON(rq->timeline == &ce->engine->timeline);
 	rq->hwsp_seqno = tl->hwsp_seqno;
 	rq->hwsp_cacheline = tl->hwsp_cacheline;
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
@@ -1137,9 +1122,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 							 0);
 	}
 
-	spin_lock_irq(&timeline->lock);
 	list_add_tail(&rq->link, &timeline->requests);
-	spin_unlock_irq(&timeline->lock);
 
 	/*
 	 * Make sure that no request gazumped us - if it was allocated after
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c9f7d07991c8..edbbdfec24ab 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -217,7 +217,7 @@ struct i915_request {
 
 	bool waitboost;
 
-	/** engine->request_list entry for this request */
+	/** timeline->request entry for this request */
 	struct list_head link;
 
 	/** ring->request_list entry for this request */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 78ceb56d7801..2e9b38bdc33c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -77,7 +77,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
 	bool first = true;
 	int idx, i;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 	assert_priolists(execlists);
 
 	/* buckets sorted from highest [in slot 0] to lowest priority */
@@ -162,9 +162,9 @@ sched_lock_engine(const struct i915_sched_node *node,
 	 * check that the rq still belongs to the newly locked engine.
 	 */
 	while (locked != (engine = READ_ONCE(rq->engine))) {
-		spin_unlock(&locked->timeline.lock);
+		spin_unlock(&locked->active.lock);
 		memset(cache, 0, sizeof(*cache));
-		spin_lock(&engine->timeline.lock);
+		spin_lock(&engine->active.lock);
 		locked = engine;
 	}
 
@@ -189,7 +189,7 @@ static void kick_submission(struct intel_engine_cs *engine, int prio)
 	 * tasklet, i.e. we have not change the priority queue
 	 * sufficiently to oust the running context.
 	 */
-	if (inflight && !i915_scheduler_need_preempt(prio, rq_prio(inflight)))
+	if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight)))
 		return;
 
 	tasklet_hi_schedule(&engine->execlists.tasklet);
@@ -278,7 +278,7 @@ static void __i915_schedule(struct i915_sched_node *node,
 
 	memset(&cache, 0, sizeof(cache));
 	engine = node_to_request(node)->engine;
-	spin_lock(&engine->timeline.lock);
+	spin_lock(&engine->active.lock);
 
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	engine = sched_lock_engine(node, engine, &cache);
@@ -287,7 +287,7 @@ static void __i915_schedule(struct i915_sched_node *node,
 
 		node = dep->signaler;
 		engine = sched_lock_engine(node, engine, &cache);
-		lockdep_assert_held(&engine->timeline.lock);
+		lockdep_assert_held(&engine->active.lock);
 
 		/* Recheck after acquiring the engine->timeline.lock */
 		if (prio <= node->attr.priority || node_signaled(node))
@@ -296,14 +296,8 @@ static void __i915_schedule(struct i915_sched_node *node,
 		GEM_BUG_ON(node_to_request(node)->engine != engine);
 
 		node->attr.priority = prio;
-		if (!list_empty(&node->link)) {
-			GEM_BUG_ON(intel_engine_is_virtual(engine));
-			if (!cache.priolist)
-				cache.priolist =
-					i915_sched_lookup_priolist(engine,
-								   prio);
-			list_move_tail(&node->link, cache.priolist);
-		} else {
+
+		if (list_empty(&node->link)) {
 			/*
 			 * If the request is not in the priolist queue because
 			 * it is not yet runnable, then it doesn't contribute
@@ -312,8 +306,16 @@ static void __i915_schedule(struct i915_sched_node *node,
 			 * queue; but in that case we may still need to reorder
 			 * the inflight requests.
 			 */
-			if (!i915_sw_fence_done(&node_to_request(node)->submit))
-				continue;
+			continue;
+		}
+
+		if (!intel_engine_is_virtual(engine) &&
+		    !i915_request_is_active(node_to_request(node))) {
+			if (!cache.priolist)
+				cache.priolist =
+					i915_sched_lookup_priolist(engine,
+								   prio);
+			list_move_tail(&node->link, cache.priolist);
 		}
 
 		if (prio <= engine->execlists.queue_priority_hint)
@@ -325,7 +327,7 @@ static void __i915_schedule(struct i915_sched_node *node,
 		kick_submission(engine, prio);
 	}
 
-	spin_unlock(&engine->timeline.lock);
+	spin_unlock(&engine->active.lock);
 }
 
 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
@@ -439,8 +441,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 {
 	struct i915_dependency *dep, *tmp;
 
-	GEM_BUG_ON(!list_empty(&node->link));
-
 	spin_lock_irq(&schedule_lock);
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 000e1a9b6750..c311ce9c6f9d 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -251,7 +251,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 
 	timeline->fence_context = dma_fence_context_alloc(1);
 
-	spin_lock_init(&timeline->lock);
 	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->last_request);
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 27668a1a69a3..36e5e5a65155 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -36,25 +36,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 		       struct i915_vma *hwsp);
 void i915_timeline_fini(struct i915_timeline *tl);
 
-static inline void
-i915_timeline_set_subclass(struct i915_timeline *timeline,
-			   unsigned int subclass)
-{
-	lockdep_set_subclass(&timeline->lock, subclass);
-
-	/*
-	 * Due to an interesting quirk in lockdep's internal debug tracking,
-	 * after setting a subclass we must ensure the lock is used. Otherwise,
-	 * nr_unused_locks is incremented once too often.
-	 */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	local_irq_disable();
-	lock_map_acquire(&timeline->lock.dep_map);
-	lock_map_release(&timeline->lock.dep_map);
-	local_irq_enable();
-#endif
-}
-
 struct i915_timeline *
 i915_timeline_create(struct drm_i915_private *i915,
 		     struct i915_vma *global_hwsp);
diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
index 1688705f4a2b..fce5cb4f1090 100644
--- a/drivers/gpu/drm/i915/i915_timeline_types.h
+++ b/drivers/gpu/drm/i915/i915_timeline_types.h
@@ -23,10 +23,6 @@ struct i915_timeline {
 	u64 fence_context;
 	u32 seqno;
 
-	spinlock_t lock;
-#define TIMELINE_CLIENT 0 /* default subclass */
-#define TIMELINE_ENGINE 1
-#define TIMELINE_VIRTUAL 2
 	struct mutex mutex; /* protects the flow of requests */
 
 	unsigned int pin_count;
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 89592ef778b8..928121f06054 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -740,7 +740,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 	bool submit = false;
 	struct rb_node *rb;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (port_isset(port)) {
 		if (intel_engine_has_preemption(engine)) {
@@ -822,7 +822,7 @@ static void guc_submission_tasklet(unsigned long data)
 	struct i915_request *rq;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
@@ -847,7 +847,7 @@ static void guc_submission_tasklet(unsigned long data)
 	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
 		guc_dequeue(engine);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void guc_reset_prepare(struct intel_engine_cs *engine)
@@ -884,7 +884,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 	struct i915_request *rq;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	execlists_cancel_port_requests(execlists);
 
@@ -900,7 +900,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 	intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
 
 out_unlock:
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void guc_cancel_requests(struct intel_engine_cs *engine)
@@ -926,13 +926,13 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 	 * submission's irq state, we also wish to remind ourselves that
 	 * it is irq state.)
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
 	execlists_cancel_port_requests(execlists);
 
 	/* Mark all executing requests as skipped. */
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(rq))
 			dma_fence_set_error(&rq->fence, -EIO);
 
@@ -961,7 +961,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 	execlists->queue = RB_ROOT_CACHED;
 	GEM_BUG_ON(port_isset(execlists->port));
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void guc_reset_finish(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index e084476469ef..65b52be23d42 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -13,7 +13,6 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 	timeline->i915 = NULL;
 	timeline->fence_context = context;
 
-	spin_lock_init(&timeline->lock);
 	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->last_request);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 4/8] drm/i915: Flush the execution-callbacks on retiring
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (2 preceding siblings ...)
  2019-06-12  9:31 ` [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list Chris Wilson
@ 2019-06-12  9:31 ` Chris Wilson
  2019-06-12  9:31 ` [PATCH 5/8] drm/i915/execlists: Preempt-to-busy Chris Wilson
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2019-06-12  9:31 UTC (permalink / raw)
  To: intel-gfx

In the unlikely case the request completes while we regard it as not even
executing on the GPU (see the next patch!), we have to flush any pending
execution callbacks at retirement and ensure that we do not add any
more.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 93 +++++++++++++++--------------
 1 file changed, 49 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6b0a4d9343a6..6b27892ebba7 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -119,6 +119,50 @@ const struct dma_fence_ops i915_fence_ops = {
 	.release = i915_fence_release,
 };
 
+static void irq_execute_cb(struct irq_work *wrk)
+{
+	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+	i915_sw_fence_complete(cb->fence);
+	kmem_cache_free(global.slab_execute_cbs, cb);
+}
+
+static void irq_execute_cb_hook(struct irq_work *wrk)
+{
+	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+	cb->hook(container_of(cb->fence, struct i915_request, submit),
+		 &cb->signal->fence);
+	i915_request_put(cb->signal);
+
+	irq_execute_cb(wrk);
+}
+
+static void __notify_execute_cb(struct i915_request *rq)
+{
+	struct execute_cb *cb;
+
+	lockdep_assert_held(&rq->lock);
+
+	if (list_empty(&rq->execute_cb))
+		return;
+
+	list_for_each_entry(cb, &rq->execute_cb, link)
+		irq_work_queue(&cb->work);
+
+	/*
+	 * XXX Rollback on __i915_request_unsubmit()
+	 *
+	 * In the future, perhaps when we have an active time-slicing scheduler,
+	 * it will be interesting to unsubmit parallel execution and remove
+	 * busywaits from the GPU until their master is restarted. This is
+	 * quite hairy, we have to carefully rollback the fence and do a
+	 * preempt-to-idle cycle on the target engine, all the while the
+	 * master execute_cb may refire.
+	 */
+	INIT_LIST_HEAD(&rq->execute_cb);
+}
+
 static inline void
 i915_request_remove_from_client(struct i915_request *request)
 {
@@ -246,6 +290,11 @@ static bool i915_request_retire(struct i915_request *rq)
 		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
 		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
 	}
+	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
+		set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
+		__notify_execute_cb(rq);
+	}
+	GEM_BUG_ON(!list_empty(&rq->execute_cb));
 	spin_unlock(&rq->lock);
 
 	local_irq_enable();
@@ -285,50 +334,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
-static void irq_execute_cb(struct irq_work *wrk)
-{
-	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
-
-	i915_sw_fence_complete(cb->fence);
-	kmem_cache_free(global.slab_execute_cbs, cb);
-}
-
-static void irq_execute_cb_hook(struct irq_work *wrk)
-{
-	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
-
-	cb->hook(container_of(cb->fence, struct i915_request, submit),
-		 &cb->signal->fence);
-	i915_request_put(cb->signal);
-
-	irq_execute_cb(wrk);
-}
-
-static void __notify_execute_cb(struct i915_request *rq)
-{
-	struct execute_cb *cb;
-
-	lockdep_assert_held(&rq->lock);
-
-	if (list_empty(&rq->execute_cb))
-		return;
-
-	list_for_each_entry(cb, &rq->execute_cb, link)
-		irq_work_queue(&cb->work);
-
-	/*
-	 * XXX Rollback on __i915_request_unsubmit()
-	 *
-	 * In the future, perhaps when we have an active time-slicing scheduler,
-	 * it will be interesting to unsubmit parallel execution and remove
-	 * busywaits from the GPU until their master is restarted. This is
-	 * quite hairy, we have to carefully rollback the fence and do a
-	 * preempt-to-idle cycle on the target engine, all the while the
-	 * master execute_cb may refire.
-	 */
-	INIT_LIST_HEAD(&rq->execute_cb);
-}
-
 static int
 __i915_request_await_execution(struct i915_request *rq,
 			       struct i915_request *signal,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 5/8] drm/i915/execlists: Preempt-to-busy
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (3 preceding siblings ...)
  2019-06-12  9:31 ` [PATCH 4/8] drm/i915: Flush the execution-callbacks on retiring Chris Wilson
@ 2019-06-12  9:31 ` Chris Wilson
  2019-06-12  9:31 ` [PATCH 6/8] drm/i915/execlists: Minimalistic timeslicing Chris Wilson
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2019-06-12  9:31 UTC (permalink / raw)
  To: intel-gfx

When using a global seqno, we required a precise stop-the-workd event to
handle preemption and unwind the global seqno counter. To accomplish
this, we would preempt to a special out-of-band context and wait for the
machine to report that it was idle. Given an idle machine, we could very
precisely see which requests had completed and which we needed to feed
back into the run queue.

However, now that we have scrapped the global seqno, we no longer need
to precisely unwind the global counter and only track requests by their
per-context seqno. This allows us to loosely unwind inflight requests
while scheduling a preemption, with the enormous caveat that the
requests we put back on the run queue are still _inflight_ (until the
preemption request is complete). This makes request tracking much more
messy, as at any point then we can see a completed request that we
believe is not currently scheduled for execution. We also have to be
careful not to rewind RING_TAIL past RING_HEAD on preempting to the
running context, and for this we use a semaphore to prevent completion
of the request before continuing.

To accomplish this feat, we change how we track requests scheduled to
the HW. Instead of appending our requests onto a single list as we
submit, we track each submission to ELSP as its own block. Then upon
receiving the CS preemption event, we promote the pending block to the
inflight block (discarding what was previously being tracked). As normal
CS completion events arrive, we then remove stale entries from the
inflight tracker.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   2 +-
 drivers/gpu/drm/i915/gt/intel_context_types.h |   5 +
 drivers/gpu/drm/i915/gt/intel_engine.h        |  61 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  61 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  52 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 671 ++++++++----------
 drivers/gpu/drm/i915/i915_gpu_error.c         |  19 +-
 drivers/gpu/drm/i915/i915_request.c           |   6 +
 drivers/gpu/drm/i915/i915_request.h           |   1 +
 drivers/gpu/drm/i915/i915_scheduler.c         |   3 +-
 drivers/gpu/drm/i915/i915_utils.h             |  12 +
 drivers/gpu/drm/i915/intel_guc_submission.c   | 175 ++---
 drivers/gpu/drm/i915/selftests/i915_request.c |   8 +-
 13 files changed, 465 insertions(+), 611 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 6200060aef05..1ce122f4ed25 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -646,7 +646,7 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_EXECLISTS(i915);
+	return USES_GUC_SUBMISSION(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e95be4be9612..b565c3ff4378 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 
 #include "i915_active_types.h"
+#include "i915_utils.h"
 #include "intel_engine_types.h"
 #include "intel_sseu.h"
 
@@ -38,6 +39,10 @@ struct intel_context {
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
+#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
+#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
+#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
+#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
 
 	struct list_head signal_link;
 	struct list_head signals;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 6be607e9c084..b798fbdd03b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -124,71 +124,26 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
 
-static inline void
-execlists_set_active(struct intel_engine_execlists *execlists,
-		     unsigned int bit)
-{
-	__set_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline bool
-execlists_set_active_once(struct intel_engine_execlists *execlists,
-			  unsigned int bit)
-{
-	return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline void
-execlists_clear_active(struct intel_engine_execlists *execlists,
-		       unsigned int bit)
-{
-	__clear_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline void
-execlists_clear_all_active(struct intel_engine_execlists *execlists)
+static inline unsigned int
+execlists_num_ports(const struct intel_engine_execlists * const execlists)
 {
-	execlists->active = 0;
+	return execlists->port_mask + 1;
 }
 
-static inline bool
-execlists_is_active(const struct intel_engine_execlists *execlists,
-		    unsigned int bit)
+static inline struct i915_request *
+execlists_active(const struct intel_engine_execlists *execlists)
 {
-	return test_bit(bit, (unsigned long *)&execlists->active);
+	GEM_BUG_ON(execlists->active - execlists->inflight >
+		   execlists_num_ports(execlists));
+	return READ_ONCE(*execlists->active);
 }
 
-void execlists_user_begin(struct intel_engine_execlists *execlists,
-			  const struct execlist_port *port);
-void execlists_user_end(struct intel_engine_execlists *execlists);
-
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
 
 struct i915_request *
 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
 
-static inline unsigned int
-execlists_num_ports(const struct intel_engine_execlists * const execlists)
-{
-	return execlists->port_mask + 1;
-}
-
-static inline struct execlist_port *
-execlists_port_complete(struct intel_engine_execlists * const execlists,
-			struct execlist_port * const port)
-{
-	const unsigned int m = execlists->port_mask;
-
-	GEM_BUG_ON(port_index(port, execlists) != 0);
-	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-
-	memmove(port, port + 1, m * sizeof(struct execlist_port));
-	memset(port + m, 0, sizeof(struct execlist_port));
-
-	return port;
-}
-
 static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 01f50cfd517c..01357280449a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -508,6 +508,10 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
 	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 
+	memset(execlists->pending, 0, sizeof(execlists->pending));
+	execlists->active =
+		memset(execlists->inflight, 0, sizeof(execlists->inflight));
+
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
 }
@@ -1152,7 +1156,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		return true;
 
 	/* Waiting to drain ELSP? */
-	if (READ_ONCE(engine->execlists.active)) {
+	if (execlists_active(&engine->execlists)) {
 		struct tasklet_struct *t = &engine->execlists.tasklet;
 
 		synchronize_hardirq(engine->i915->drm.irq);
@@ -1169,7 +1173,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		/* Otherwise flush the tasklet if it was on another cpu */
 		tasklet_unlock_wait(t);
 
-		if (READ_ONCE(engine->execlists.active))
+		if (execlists_active(&engine->execlists))
 			return false;
 	}
 
@@ -1366,6 +1370,7 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 	}
 
 	if (HAS_EXECLISTS(dev_priv)) {
+		struct i915_request * const *port, *rq;
 		const u32 *hws =
 			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 		const u8 num_entries = execlists->csb_size;
@@ -1398,26 +1403,28 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 		}
 
 		rcu_read_lock();
-		for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
-			struct i915_request *rq;
-			unsigned int count;
-
-			rq = port_unpack(&execlists->port[idx], &count);
-			if (rq) {
-				char hdr[80];
-
-				snprintf(hdr, sizeof(hdr),
-					 "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
-					 idx, count,
-					 i915_ggtt_offset(rq->ring->vma),
-					 rq->timeline->hwsp_offset,
-					 hwsp_seqno(rq));
-				print_request(m, rq, hdr);
-			} else {
-				drm_printf(m, "\t\tELSP[%d] idle\n", idx);
-			}
+		for (port = execlists->active; (rq = *port); port++) {
+			char hdr[80];
+
+			snprintf(hdr, sizeof(hdr),
+				 "\t\tActive[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
+				 (int)(port - execlists->active),
+				 i915_ggtt_offset(rq->ring->vma),
+				 rq->timeline->hwsp_offset,
+				 hwsp_seqno(rq));
+			print_request(m, rq, hdr);
+		}
+		for (port = execlists->pending; (rq = *port); port++) {
+			char hdr[80];
+
+			snprintf(hdr, sizeof(hdr),
+				 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
+				 (int)(port - execlists->pending),
+				 i915_ggtt_offset(rq->ring->vma),
+				 rq->timeline->hwsp_offset,
+				 hwsp_seqno(rq));
+			print_request(m, rq, hdr);
 		}
-		drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
 		rcu_read_unlock();
 	} else if (INTEL_GEN(dev_priv) > 6) {
 		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
@@ -1581,15 +1588,19 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	}
 
 	if (engine->stats.enabled++ == 0) {
-		const struct execlist_port *port = execlists->port;
-		unsigned int num_ports = execlists_num_ports(execlists);
+		struct i915_request * const *port;
+		struct i915_request *rq;
 
 		engine->stats.enabled_at = ktime_get();
 
 		/* XXX submission method oblivious? */
-		while (num_ports-- && port_isset(port)) {
+		for (port = execlists->active; (rq = *port); port++)
 			engine->stats.active++;
-			port++;
+
+		for (port = execlists->pending; (rq = *port); port++) {
+			/* Exclude any contexts already counted in active */
+			if (intel_context_inflight_count(rq->hw_context) == 1)
+				engine->stats.active++;
 		}
 
 		if (engine->stats.active)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index b2faca8e5dec..dd0082df42cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -160,51 +160,10 @@ struct intel_engine_execlists {
 	 */
 	u32 __iomem *ctrl_reg;
 
-	/**
-	 * @port: execlist port states
-	 *
-	 * For each hardware ELSP (ExecList Submission Port) we keep
-	 * track of the last request and the number of times we submitted
-	 * that port to hw. We then count the number of times the hw reports
-	 * a context completion or preemption. As only one context can
-	 * be active on hw, we limit resubmission of context to port[0]. This
-	 * is called Lite Restore, of the context.
-	 */
-	struct execlist_port {
-		/**
-		 * @request_count: combined request and submission count
-		 */
-		struct i915_request *request_count;
-#define EXECLIST_COUNT_BITS 2
-#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
-#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
-#define port_set(p, packed) ((p)->request_count = (packed))
-#define port_isset(p) ((p)->request_count)
-#define port_index(p, execlists) ((p) - (execlists)->port)
-
-		/**
-		 * @context_id: context ID for port
-		 */
-		GEM_DEBUG_DECL(u32 context_id);
-
 #define EXECLIST_MAX_PORTS 2
-	} port[EXECLIST_MAX_PORTS];
-
-	/**
-	 * @active: is the HW active? We consider the HW as active after
-	 * submitting any context for execution and until we have seen the
-	 * last context completion event. After that, we do not expect any
-	 * more events until we submit, and so can park the HW.
-	 *
-	 * As we have a small number of different sources from which we feed
-	 * the HW, we track the state of each inside a single bitfield.
-	 */
-	unsigned int active;
-#define EXECLISTS_ACTIVE_USER 0
-#define EXECLISTS_ACTIVE_PREEMPT 1
-#define EXECLISTS_ACTIVE_HWACK 2
+	struct i915_request * const *active;
+	struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
+	struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
 
 	/**
 	 * @port_mask: number of execlist ports - 1
@@ -245,11 +204,6 @@ struct intel_engine_execlists {
 	 */
 	u32 *csb_status;
 
-	/**
-	 * @preempt_complete_status: expected CSB upon completing preemption
-	 */
-	u32 preempt_complete_status;
-
 	/**
 	 * @csb_size: context status buffer FIFO size
 	 */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 853376895505..2b12ec9bc0d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -161,6 +161,8 @@
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 #define WA_TAIL_DWORDS 2
@@ -221,6 +223,14 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
+static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_PREEMPT_ADDR);
+}
+
+#define ring_pause(E) ((E)->status_page.addr[I915_GEM_HWS_PREEMPT])
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -271,12 +281,6 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 {
 	int last_prio;
 
-	if (!engine->preempt_context)
-		return false;
-
-	if (i915_request_completed(rq))
-		return false;
-
 	/*
 	 * Check if the current priority hint merits a preemption attempt.
 	 *
@@ -338,9 +342,6 @@ __maybe_unused static inline bool
 assert_priority_queue(const struct i915_request *prev,
 		      const struct i915_request *next)
 {
-	const struct intel_engine_execlists *execlists =
-		&prev->engine->execlists;
-
 	/*
 	 * Without preemption, the prev may refer to the still active element
 	 * which we refuse to let go.
@@ -348,7 +349,7 @@ assert_priority_queue(const struct i915_request *prev,
 	 * Even with preemption, there are times when we think it is better not
 	 * to preempt and leave an ostensibly lower priority request in flight.
 	 */
-	if (port_request(execlists->port) == prev)
+	if (i915_request_is_active(prev))
 		return true;
 
 	return rq_prio(prev) >= rq_prio(next);
@@ -442,13 +443,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		struct intel_engine_cs *owner;
 
 		if (i915_request_completed(rq))
-			break;
+			continue; /* XXX */
 
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
-		GEM_BUG_ON(rq->hw_context->inflight);
-
 		/*
 		 * Push the request back into the queue for later resubmission.
 		 * If this request is not native to this physical engine (i.e.
@@ -500,32 +499,32 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
-inline void
-execlists_user_begin(struct intel_engine_execlists *execlists,
-		     const struct execlist_port *port)
+static inline struct i915_request *
+execlists_schedule_in(struct i915_request *rq, int idx)
 {
-	execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
-}
+	struct intel_context *ce = rq->hw_context;
+	int count;
 
-inline void
-execlists_user_end(struct intel_engine_execlists *execlists)
-{
-	execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
-}
+	trace_i915_request_in(rq, idx);
 
-static inline void
-execlists_context_schedule_in(struct i915_request *rq)
-{
-	GEM_BUG_ON(rq->hw_context->inflight);
+	count = intel_context_inflight_count(ce);
+	if (!count) {
+		intel_context_get(ce);
+		ce->inflight = rq->engine;
+
+		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+		intel_engine_context_in(ce->inflight);
+	}
+
+	intel_context_inflight_inc(ce);
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
 
-	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-	intel_engine_context_in(rq->engine);
-	rq->hw_context->inflight = rq->engine;
+	return i915_request_get(rq);
 }
 
-static void kick_siblings(struct i915_request *rq)
+static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 {
-	struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
 	struct i915_request *next = READ_ONCE(ve->request);
 
 	if (next && next->execution_mask & ~rq->execution_mask)
@@ -533,29 +532,42 @@ static void kick_siblings(struct i915_request *rq)
 }
 
 static inline void
-execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
+execlists_schedule_out(struct i915_request *rq)
 {
-	rq->hw_context->inflight = NULL;
-	intel_engine_context_out(rq->engine);
-	execlists_context_status_change(rq, status);
+	struct intel_context *ce = rq->hw_context;
+
+	GEM_BUG_ON(!intel_context_inflight_count(ce));
+
 	trace_i915_request_out(rq);
 
-	/*
-	 * If this is part of a virtual engine, its next request may have
-	 * been blocked waiting for access to the active context. We have
-	 * to kick all the siblings again in case we need to switch (e.g.
-	 * the next request is not runnable on this engine). Hopefully,
-	 * we will already have submitted the next request before the
-	 * tasklet runs and do not need to rebuild each virtual tree
-	 * and kick everyone again.
-	 */
-	if (rq->engine != rq->hw_context->engine)
-		kick_siblings(rq);
+	intel_context_inflight_dec(ce);
+	if (!intel_context_inflight_count(ce)) {
+		intel_engine_context_out(ce->inflight);
+		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+		ce->inflight = NULL;
+		intel_context_put(ce);
+
+		/*
+		 * If this is part of a virtual engine, its next request may
+		 * have been blocked waiting for access to the active context.
+		 * We have to kick all the siblings again in case we need to
+		 * switch (e.g. the next request is not runnable on this
+		 * engine). Hopefully, we will already have submitted the next
+		 * request before the tasklet runs and do not need to rebuild
+		 * each virtual tree and kick everyone again.
+		 */
+		if (rq->engine != ce->engine)
+			kick_siblings(rq, ce);
+	}
+
+	i915_request_put(rq);
 }
 
-static u64 execlists_update_context(struct i915_request *rq)
+static u64 execlists_update_context(const struct i915_request *rq)
 {
 	struct intel_context *ce = rq->hw_context;
+	u64 desc;
 
 	ce->lrc_reg_state[CTX_RING_TAIL + 1] =
 		intel_ring_set_tail(rq->ring, rq->tail);
@@ -576,7 +588,11 @@ static u64 execlists_update_context(struct i915_request *rq)
 	 * wmb).
 	 */
 	mb();
-	return ce->lrc_desc;
+
+	desc = ce->lrc_desc;
+	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+
+	return desc;
 }
 
 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
@@ -590,12 +606,54 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
 	}
 }
 
+static __maybe_unused void
+trace_ports(const struct intel_engine_execlists *execlists,
+	    const char *msg,
+	    struct i915_request * const *ports)
+{
+	const struct intel_engine_cs *engine =
+		container_of(execlists, typeof(*engine), execlists);
+
+	GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
+		  engine->name, msg,
+		  ports[0]->fence.context,
+		  ports[0]->fence.seqno,
+		  i915_request_completed(ports[0]) ? "!" :
+		  i915_request_started(ports[0]) ? "*" :
+		  "",
+		  ports[1] ? ports[1]->fence.context : 0,
+		  ports[1] ? ports[1]->fence.seqno : 0);
+}
+
+static __maybe_unused bool
+assert_pending_valid(const struct intel_engine_execlists *execlists,
+		     const char *msg)
+{
+	struct i915_request * const *port, *rq;
+	struct intel_context *ce = NULL;
+
+	trace_ports(execlists, msg, execlists->pending);
+
+	if (execlists->pending[execlists_num_ports(execlists)])
+		return false;
+
+	for (port = execlists->pending; (rq = *port); port++) {
+		if (ce == rq->hw_context)
+			return false;
+
+		ce = rq->hw_context;
+	}
+
+	return ce;
+}
+
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
 	unsigned int n;
 
+	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
+
 	/*
 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
@@ -613,38 +671,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	 * of elsq entries, keep this in mind before changing the loop below.
 	 */
 	for (n = execlists_num_ports(execlists); n--; ) {
-		struct i915_request *rq;
-		unsigned int count;
-		u64 desc;
+		struct i915_request *rq = execlists->pending[n];
 
-		rq = port_unpack(&port[n], &count);
-		if (rq) {
-			GEM_BUG_ON(count > !n);
-			if (!count++)
-				execlists_context_schedule_in(rq);
-			port_set(&port[n], port_pack(rq, count));
-			desc = execlists_update_context(rq);
-			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
-
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
-				  engine->name, n,
-				  port[n].context_id, count,
-				  rq->fence.context, rq->fence.seqno,
-				  hwsp_seqno(rq),
-				  rq_prio(rq));
-		} else {
-			GEM_BUG_ON(!n);
-			desc = 0;
-		}
-
-		write_desc(execlists, desc, n);
+		write_desc(execlists,
+			   rq ? execlists_update_context(rq) : 0,
+			   n);
 	}
 
 	/* we need to manually load the submit queue */
 	if (execlists->ctrl_reg)
 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
-	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
 static bool ctx_single_port_submission(const struct intel_context *ce)
@@ -668,6 +704,7 @@ static bool can_merge_ctx(const struct intel_context *prev,
 static bool can_merge_rq(const struct i915_request *prev,
 			 const struct i915_request *next)
 {
+	GEM_BUG_ON(prev == next);
 	GEM_BUG_ON(!assert_priority_queue(prev, next));
 
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
@@ -676,58 +713,6 @@ static bool can_merge_rq(const struct i915_request *prev,
 	return true;
 }
 
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
-{
-	GEM_BUG_ON(rq == port_request(port));
-
-	if (port_isset(port))
-		i915_request_put(port_request(port));
-
-	port_set(port, port_pack(i915_request_get(rq), port_count(port)));
-}
-
-static void inject_preempt_context(struct intel_engine_cs *engine)
-{
-	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce = engine->preempt_context;
-	unsigned int n;
-
-	GEM_BUG_ON(execlists->preempt_complete_status !=
-		   upper_32_bits(ce->lrc_desc));
-
-	/*
-	 * Switch to our empty preempt context so
-	 * the state of the GPU is known (idle).
-	 */
-	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
-
-	write_desc(execlists, ce->lrc_desc, n);
-
-	/* we need to manually load the submit queue */
-	if (execlists->ctrl_reg)
-		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
-	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
-	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
-
-	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
-}
-
-static void complete_preempt_context(struct intel_engine_execlists *execlists)
-{
-	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
-	if (inject_preempt_hang(execlists))
-		return;
-
-	execlists_cancel_port_requests(execlists);
-	__unwind_incomplete_requests(container_of(execlists,
-						  struct intel_engine_cs,
-						  execlists));
-}
-
 static void virtual_update_register_offsets(u32 *regs,
 					    struct intel_engine_cs *engine)
 {
@@ -792,7 +777,7 @@ static bool virtual_matches(const struct virtual_engine *ve,
 	 * we reuse the register offsets). This is a very small
 	 * hystersis on the greedy seelction algorithm.
 	 */
-	inflight = READ_ONCE(ve->context.inflight);
+	inflight = intel_context_inflight(&ve->context);
 	if (inflight && inflight != engine)
 		return false;
 
@@ -815,13 +800,23 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
 	spin_unlock(&old->breadcrumbs.irq_lock);
 }
 
+static struct i915_request *
+last_active(const struct intel_engine_execlists *execlists)
+{
+	struct i915_request * const *last = execlists->active;
+
+	while (*last && i915_request_completed(*last))
+		last++;
+
+	return *last;
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	const struct execlist_port * const last_port =
-		&execlists->port[execlists->port_mask];
-	struct i915_request *last = port_request(port);
+	struct i915_request **port = execlists->pending;
+	struct i915_request ** const last_port = port + execlists->port_mask;
+	struct i915_request *last;
 	struct rb_node *rb;
 	bool submit = false;
 
@@ -867,65 +862,72 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		break;
 	}
 
+	/*
+	 * If the queue is higher priority than the last
+	 * request in the currently active context, submit afresh.
+	 * We will resubmit again afterwards in case we need to split
+	 * the active context to interject the preemption request,
+	 * i.e. we will retrigger preemption following the ack in case
+	 * of trouble.
+	 */
+	last = last_active(execlists);
 	if (last) {
-		/*
-		 * Don't resubmit or switch until all outstanding
-		 * preemptions (lite-restore) are seen. Then we
-		 * know the next preemption status we see corresponds
-		 * to this ELSP update.
-		 */
-		GEM_BUG_ON(!execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_USER));
-		GEM_BUG_ON(!port_count(&port[0]));
-
-		/*
-		 * If we write to ELSP a second time before the HW has had
-		 * a chance to respond to the previous write, we can confuse
-		 * the HW and hit "undefined behaviour". After writing to ELSP,
-		 * we must then wait until we see a context-switch event from
-		 * the HW to indicate that it has had a chance to respond.
-		 */
-		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-			return;
-
 		if (need_preempt(engine, last, rb)) {
-			inject_preempt_context(engine);
-			return;
-		}
+			GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
+				  engine->name,
+				  last->fence.context,
+				  last->fence.seqno,
+				  last->sched.attr.priority,
+				  execlists->queue_priority_hint);
+			/*
+			 * Don't let the RING_HEAD advance past the breadcrumb
+			 * as we unwind (and until we resubmit) so that we do
+			 * not accidentally tell it to go backwards.
+			 */
+			ring_pause(engine) = 1;
 
-		/*
-		 * In theory, we could coalesce more requests onto
-		 * the second port (the first port is active, with
-		 * no preemptions pending). However, that means we
-		 * then have to deal with the possible lite-restore
-		 * of the second port (as we submit the ELSP, there
-		 * may be a context-switch) but also we may complete
-		 * the resubmission before the context-switch. Ergo,
-		 * coalescing onto the second port will cause a
-		 * preemption event, but we cannot predict whether
-		 * that will affect port[0] or port[1].
-		 *
-		 * If the second port is already active, we can wait
-		 * until the next context-switch before contemplating
-		 * new requests. The GPU will be busy and we should be
-		 * able to resubmit the new ELSP before it idles,
-		 * avoiding pipeline bubbles (momentary pauses where
-		 * the driver is unable to keep up the supply of new
-		 * work). However, we have to double check that the
-		 * priorities of the ports haven't been switch.
-		 */
-		if (port_count(&port[1]))
-			return;
+			/*
+			 * Note that we have not stopped the GPU at this point,
+			 * so we are unwinding the incomplete requests as they
+			 * remain inflight and so by the time we do complete
+			 * the preemption, some of the unwound requests may
+			 * complete!
+			 */
+			__unwind_incomplete_requests(engine);
 
-		/*
-		 * WaIdleLiteRestore:bdw,skl
-		 * Apply the wa NOOPs to prevent
-		 * ring:HEAD == rq:TAIL as we resubmit the
-		 * request. See gen8_emit_fini_breadcrumb() for
-		 * where we prepare the padding after the
-		 * end of the request.
-		 */
-		last->tail = last->wa_tail;
+			/*
+			 * If we need to return to the preempted context, we
+			 * need to skip the lite-restore and force it to
+			 * reload the RING_TAIL. Otherwise, the HW has a
+			 * tendency to ignore us rewinding the TAIL to the
+			 * end of an earlier request.
+			 */
+			last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+			last = NULL;
+		} else {
+			/*
+			 * Otherwise if we already have a request pending
+			 * for execution after the current one, we can
+			 * just wait until the next CS event before
+			 * queuing more. In either case we will force a
+			 * lite-restore preemption event, but if we wait
+			 * we hopefully coalesce several updates into a single
+			 * submission.
+			 */
+			if (!list_is_last(&last->sched.link,
+					  &engine->active.requests))
+				return;
+
+			/*
+			 * WaIdleLiteRestore:bdw,skl
+			 * Apply the wa NOOPs to prevent
+			 * ring:HEAD == rq:TAIL as we resubmit the
+			 * request. See gen8_emit_fini_breadcrumb() for
+			 * where we prepare the padding after the
+			 * end of the request.
+			 */
+			last->tail = last->wa_tail;
+		}
 	}
 
 	while (rb) { /* XXX virtual is always taking precedence */
@@ -955,9 +957,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				continue;
 			}
 
+			if (i915_request_completed(rq)) {
+				ve->request = NULL;
+				ve->base.execlists.queue_priority_hint = INT_MIN;
+				rb_erase_cached(rb, &execlists->virtual);
+				RB_CLEAR_NODE(rb);
+
+				rq->engine = engine;
+				__i915_request_submit(rq);
+
+				spin_unlock(&ve->base.active.lock);
+
+				rb = rb_first_cached(&execlists->virtual);
+				continue;
+			}
+
 			if (last && !can_merge_rq(last, rq)) {
 				spin_unlock(&ve->base.active.lock);
-				return; /* leave this rq for another engine */
+				return; /* leave this for another */
 			}
 
 			GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
@@ -1006,9 +1023,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			}
 
 			__i915_request_submit(rq);
-			trace_i915_request_in(rq, port_index(port, execlists));
-			submit = true;
-			last = rq;
+			if (!i915_request_completed(rq)) {
+				submit = true;
+				last = rq;
+			}
 		}
 
 		spin_unlock(&ve->base.active.lock);
@@ -1021,6 +1039,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
+			if (i915_request_completed(rq))
+				goto skip;
+
 			/*
 			 * Can we combine this request with the current port?
 			 * It has to be the same context/ringbuffer and not
@@ -1060,19 +1081,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				    ctx_single_port_submission(rq->hw_context))
 					goto done;
 
-
-				if (submit)
-					port_assign(port, last);
+				*port = execlists_schedule_in(last, port - execlists->pending);
 				port++;
-
-				GEM_BUG_ON(port_isset(port));
 			}
 
-			__i915_request_submit(rq);
-			trace_i915_request_in(rq, port_index(port, execlists));
-
 			last = rq;
 			submit = true;
+skip:
+			__i915_request_submit(rq);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
@@ -1097,54 +1113,30 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * interrupt for secondary ports).
 	 */
 	execlists->queue_priority_hint = queue_prio(execlists);
+	GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
+		  engine->name, execlists->queue_priority_hint,
+		  yesno(submit));
 
 	if (submit) {
-		port_assign(port, last);
+		*port = execlists_schedule_in(last, port - execlists->pending);
+		memset(port + 1, 0, (last_port - port) * sizeof(*port));
 		execlists_submit_ports(engine);
 	}
-
-	/* We must always keep the beast fed if we have work piled up */
-	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
-		   !port_isset(execlists->port));
-
-	/* Re-evaluate the executing context setup after each preemptive kick */
-	if (last)
-		execlists_user_begin(execlists, execlists->port);
-
-	/* If the engine is now idle, so should be the flag; and vice versa. */
-	GEM_BUG_ON(execlists_is_active(&engine->execlists,
-				       EXECLISTS_ACTIVE_USER) ==
-		   !port_isset(engine->execlists.port));
 }
 
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
-	struct execlist_port *port = execlists->port;
-	unsigned int num_ports = execlists_num_ports(execlists);
-
-	while (num_ports-- && port_isset(port)) {
-		struct i915_request *rq = port_request(port);
-
-		GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
-			  rq->engine->name,
-			  (unsigned int)(port - execlists->port),
-			  rq->fence.context, rq->fence.seqno,
-			  hwsp_seqno(rq));
+	struct i915_request * const *port, *rq;
 
-		GEM_BUG_ON(!execlists->active);
-		execlists_context_schedule_out(rq,
-					       i915_request_completed(rq) ?
-					       INTEL_CONTEXT_SCHEDULE_OUT :
-					       INTEL_CONTEXT_SCHEDULE_PREEMPTED);
+	for (port = execlists->pending; (rq = *port); port++)
+		execlists_schedule_out(rq);
+	memset(execlists->pending, 0, sizeof(execlists->pending));
 
-		i915_request_put(rq);
-
-		memset(port, 0, sizeof(*port));
-		port++;
-	}
-
-	execlists_clear_all_active(execlists);
+	for (port = execlists->active; (rq = *port); port++)
+		execlists_schedule_out(rq);
+	execlists->active =
+		memset(execlists->inflight, 0, sizeof(execlists->inflight));
 }
 
 static inline void
@@ -1163,7 +1155,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
 	const u32 * const buf = execlists->csb_status;
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
@@ -1197,9 +1188,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	rmb();
 
 	do {
-		struct i915_request *rq;
 		unsigned int status;
-		unsigned int count;
 
 		if (++head == num_entries)
 			head = 0;
@@ -1222,68 +1211,37 @@ static void process_csb(struct intel_engine_cs *engine)
 		 * status notifier.
 		 */
 
-		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
 			  engine->name, head,
-			  buf[2 * head + 0], buf[2 * head + 1],
-			  execlists->active);
+			  buf[2 * head + 0], buf[2 * head + 1]);
 
 		status = buf[2 * head];
-		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-			      GEN8_CTX_STATUS_PREEMPTED))
-			execlists_set_active(execlists,
-					     EXECLISTS_ACTIVE_HWACK);
-		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-			execlists_clear_active(execlists,
-					       EXECLISTS_ACTIVE_HWACK);
-
-		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-			continue;
-
-		/* We should never get a COMPLETED | IDLE_ACTIVE! */
-		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) {
+promote:
+			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+			execlists->active =
+				memcpy(execlists->inflight,
+				       execlists->pending,
+				       execlists_num_ports(execlists) *
+				       sizeof(*execlists->pending));
+			execlists->pending[0] = NULL;
 
-		if (status & GEN8_CTX_STATUS_COMPLETE &&
-		    buf[2*head + 1] == execlists->preempt_complete_status) {
-			GEM_TRACE("%s preempt-idle\n", engine->name);
-			complete_preempt_context(execlists);
-			continue;
-		}
+			if (!inject_preempt_hang(execlists))
+				ring_pause(engine) = 0;
+		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
+			struct i915_request * const *port = execlists->active;
 
-		if (status & GEN8_CTX_STATUS_PREEMPTED &&
-		    execlists_is_active(execlists,
-					EXECLISTS_ACTIVE_PREEMPT))
-			continue;
+			trace_ports(execlists, "preempted", execlists->active);
 
-		GEM_BUG_ON(!execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_USER));
+			while (*port)
+				execlists_schedule_out(*port++);
 
-		rq = port_unpack(port, &count);
-		GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
-			  engine->name,
-			  port->context_id, count,
-			  rq ? rq->fence.context : 0,
-			  rq ? rq->fence.seqno : 0,
-			  rq ? hwsp_seqno(rq) : 0,
-			  rq ? rq_prio(rq) : 0);
+			goto promote;
+		} else if (*execlists->active) {
+			struct i915_request *rq = *execlists->active++;
 
-		/* Check the context/desc id for this event matches */
-		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
-
-		GEM_BUG_ON(count == 0);
-		if (--count == 0) {
-			/*
-			 * On the final event corresponding to the
-			 * submission of this context, we expect either
-			 * an element-switch event or a completion
-			 * event (and on completion, the active-idle
-			 * marker). No more preemptions, lite-restore
-			 * or otherwise.
-			 */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-			GEM_BUG_ON(port_isset(&port[1]) &&
-				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-			GEM_BUG_ON(!port_isset(&port[1]) &&
-				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+			trace_ports(execlists, "completed",
+				    execlists->active - 1);
 
 			/*
 			 * We rely on the hardware being strongly
@@ -1292,21 +1250,10 @@ static void process_csb(struct intel_engine_cs *engine)
 			 * user interrupt and CSB is processed.
 			 */
 			GEM_BUG_ON(!i915_request_completed(rq));
+			execlists_schedule_out(rq);
 
-			execlists_context_schedule_out(rq,
-						       INTEL_CONTEXT_SCHEDULE_OUT);
-			i915_request_put(rq);
-
-			GEM_TRACE("%s completed ctx=%d\n",
-				  engine->name, port->context_id);
-
-			port = execlists_port_complete(execlists, port);
-			if (port_isset(port))
-				execlists_user_begin(execlists, port);
-			else
-				execlists_user_end(execlists);
-		} else {
-			port_set(port, port_pack(rq, count));
+			GEM_BUG_ON(execlists->active - execlists->inflight >
+				   execlists_num_ports(execlists));
 		}
 	} while (head != tail);
 
@@ -1331,7 +1278,7 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 	lockdep_assert_held(&engine->active.lock);
 
 	process_csb(engine);
-	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+	if (!engine->execlists.pending[0])
 		execlists_dequeue(engine);
 }
 
@@ -1344,11 +1291,6 @@ static void execlists_submission_tasklet(unsigned long data)
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	unsigned long flags;
 
-	GEM_TRACE("%s awake?=%d, active=%x\n",
-		  engine->name,
-		  !!intel_wakeref_active(&engine->wakeref),
-		  engine->execlists.active);
-
 	spin_lock_irqsave(&engine->active.lock, flags);
 	__execlists_submission_tasklet(engine);
 	spin_unlock_irqrestore(&engine->active.lock, flags);
@@ -1375,12 +1317,16 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 		tasklet_hi_schedule(&execlists->tasklet);
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static void submit_queue(struct intel_engine_cs *engine,
+			 const struct i915_request *rq)
 {
-	if (prio > engine->execlists.queue_priority_hint) {
-		engine->execlists.queue_priority_hint = prio;
-		__submit_queue_imm(engine);
-	}
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	if (rq_prio(rq) <= execlists->queue_priority_hint)
+		return;
+
+	execlists->queue_priority_hint = rq_prio(rq);
+	__submit_queue_imm(engine);
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1396,7 +1342,7 @@ static void execlists_submit_request(struct i915_request *request)
 	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 	GEM_BUG_ON(list_empty(&request->sched.link));
 
-	submit_queue(engine, rq_prio(request));
+	submit_queue(engine, request);
 
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -2053,27 +1999,13 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-static bool lrc_regs_ok(const struct i915_request *rq)
-{
-	const struct intel_ring *ring = rq->ring;
-	const u32 *regs = rq->hw_context->lrc_reg_state;
-
-	/* Quick spot check for the common signs of context corruption */
-
-	if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
-	    (RING_CTL_SIZE(ring->size) | RING_VALID))
-		return false;
-
-	if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
-		return false;
-
-	return true;
-}
-
-static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+static void reset_csb_pointers(struct intel_engine_cs *engine)
 {
+	struct intel_engine_execlists * const execlists = &engine->execlists;
 	const unsigned int reset_value = execlists->csb_size - 1;
 
+	ring_pause(engine) = 0;
+
 	/*
 	 * After a reset, the HW starts writing into CSB entry [0]. We
 	 * therefore have to set our HEAD pointer back one entry so that
@@ -2120,18 +2052,19 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	process_csb(engine); /* drain preemption events */
 
 	/* Following the reset, we need to reload the CSB read/write pointers */
-	reset_csb_pointers(&engine->execlists);
+	reset_csb_pointers(engine);
 
 	/*
 	 * Save the currently executing context, even if we completed
 	 * its request, it was still running at the time of the
 	 * reset and will have been clobbered.
 	 */
-	if (!port_isset(execlists->port))
-		goto out_clear;
+	rq = execlists_active(execlists);
+	if (!rq)
+		return;
 
-	rq = port_request(execlists->port);
 	ce = rq->hw_context;
+	rq = active_request(rq);
 
 	/*
 	 * Catch up with any missed context-switch interrupts.
@@ -2144,9 +2077,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 */
 	execlists_cancel_port_requests(execlists);
 
-	rq = active_request(rq);
-	if (!rq)
+	if (!rq) {
+		ce->ring->head = ce->ring->tail;
 		goto out_replay;
+	}
+
+	ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
 
 	/*
 	 * If this request hasn't started yet, e.g. it is waiting on a
@@ -2160,7 +2096,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * Otherwise, if we have not started yet, the request should replay
 	 * perfectly and we do not need to flag the result as being erroneous.
 	 */
-	if (!i915_request_started(rq) && lrc_regs_ok(rq))
+	if (!i915_request_started(rq))
 		goto out_replay;
 
 	/*
@@ -2175,7 +2111,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * image back to the expected values to skip over the guilty request.
 	 */
 	i915_reset_request(rq, stalled);
-	if (!stalled && lrc_regs_ok(rq))
+	if (!stalled)
 		goto out_replay;
 
 	/*
@@ -2195,17 +2131,13 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	execlists_init_reg_state(regs, ce, engine, ce->ring);
 
 out_replay:
-	/* Rerun the request; its payload has been neutered (if guilty). */
-	ce->ring->head =
-		rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
+	GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
+		  engine->name, ce->ring->head, ce->ring->tail);
 	intel_ring_update_space(ce->ring);
 	__execlists_update_reg_state(ce, engine);
 
 	/* Push back any incomplete requests for replay after the reset. */
 	__unwind_incomplete_requests(engine);
-
-out_clear:
-	execlists_clear_all_active(execlists);
 }
 
 static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
@@ -2301,7 +2233,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
-	GEM_BUG_ON(port_isset(execlists->port));
 
 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
 	execlists->tasklet.func = nop_submission_tasklet;
@@ -2519,15 +2450,29 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 	return cs;
 }
 
+static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_EQ_SDD;
+	*cs++ = 0;
+	*cs++ = intel_hws_preempt_address(request->engine);
+	*cs++ = 0;
+
+	return cs;
+}
+
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write(cs,
 				  request->fence.seqno,
 				  request->timeline->hwsp_offset,
 				  0);
-
 	*cs++ = MI_USER_INTERRUPT;
+
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2548,9 +2493,10 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				    PIPE_CONTROL_FLUSH_ENABLE |
 				    PIPE_CONTROL_CS_STALL,
 				    0);
-
 	*cs++ = MI_USER_INTERRUPT;
+
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2599,8 +2545,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (!intel_vgpu_active(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-	if (engine->preempt_context &&
-	    HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+	if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 }
 
@@ -2723,11 +2668,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
 			i915_mmio_reg_offset(RING_ELSP(base));
 	}
 
-	execlists->preempt_complete_status = ~0u;
-	if (engine->preempt_context)
-		execlists->preempt_complete_status =
-			upper_32_bits(engine->preempt_context->lrc_desc);
-
 	execlists->csb_status =
 		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
@@ -2739,7 +2679,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
 	else
 		execlists->csb_size = GEN11_CSB_ENTRIES;
 
-	reset_csb_pointers(execlists);
+	reset_csb_pointers(engine);
 
 	return 0;
 }
@@ -2922,11 +2862,6 @@ populate_lr_context(struct intel_context *ce,
 	if (!engine->default_state)
 		regs[CTX_CONTEXT_CONTROL + 1] |=
 			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-	if (ce->gem_context == engine->i915->preempt_context &&
-	    INTEL_GEN(engine->i915) < 11)
-		regs[CTX_CONTEXT_CONTROL + 1] |=
-			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
 
 	ret = 0;
 err_unpin_ctx:
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4cbee4c206bd..acb853685f8d 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1247,10 +1247,10 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	}
 }
 
-static void record_request(struct i915_request *request,
+static void record_request(const struct i915_request *request,
 			   struct drm_i915_error_request *erq)
 {
-	struct i915_gem_context *ctx = request->gem_context;
+	const struct i915_gem_context *ctx = request->gem_context;
 
 	erq->flags = request->fence.flags;
 	erq->context = request->fence.context;
@@ -1314,20 +1314,15 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 	ee->num_requests = count;
 }
 
-static void error_record_engine_execlists(struct intel_engine_cs *engine,
+static void error_record_engine_execlists(const struct intel_engine_cs *engine,
 					  struct drm_i915_error_engine *ee)
 {
 	const struct intel_engine_execlists * const execlists = &engine->execlists;
-	unsigned int n;
+	struct i915_request * const *port = execlists->active;
+	unsigned int n = 0;
 
-	for (n = 0; n < execlists_num_ports(execlists); n++) {
-		struct i915_request *rq = port_request(&execlists->port[n]);
-
-		if (!rq)
-			break;
-
-		record_request(rq, &ee->execlist[n]);
-	}
+	while (*port)
+		record_request(*port++, &ee->execlist[n++]);
 
 	ee->num_ports = n;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6b27892ebba7..7d937a3522bb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -276,6 +276,12 @@ static bool i915_request_retire(struct i915_request *rq)
 
 	local_irq_disable();
 
+	/*
+	 * We only loosely track inflight requests across preemption,
+	 * and so we may find ourselves attempting to retire a _completed_
+	 * request that we have removed from the HW and put back on a run
+	 * queue.
+	 */
 	spin_lock(&rq->engine->active.lock);
 	list_del(&rq->sched.link);
 	spin_unlock(&rq->engine->active.lock);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index edbbdfec24ab..bebc1e9b4a5e 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -28,6 +28,7 @@
 #include <linux/dma-fence.h>
 #include <linux/lockdep.h>
 
+#include "gt/intel_context_types.h"
 #include "gt/intel_engine_types.h"
 
 #include "i915_gem.h"
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 2e9b38bdc33c..b1ba3e65cd52 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -179,8 +179,7 @@ static inline int rq_prio(const struct i915_request *rq)
 
 static void kick_submission(struct intel_engine_cs *engine, int prio)
 {
-	const struct i915_request *inflight =
-		port_request(engine->execlists.port);
+	const struct i915_request *inflight = *engine->execlists.active;
 
 	/*
 	 * If we are already the currently executing context, don't
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 2987219a6300..4920ff9aba62 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -131,6 +131,18 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
 	((typeof(ptr))((unsigned long)(ptr) | __bits));			\
 })
 
+#define ptr_count_dec(p_ptr) do {					\
+	typeof(p_ptr) __p = (p_ptr);					\
+	unsigned long __v = (unsigned long)(*__p);			\
+	*__p = (typeof(*p_ptr))(--__v);					\
+} while (0)
+
+#define ptr_count_inc(p_ptr) do {					\
+	typeof(p_ptr) __p = (p_ptr);					\
+	unsigned long __v = (unsigned long)(*__p);			\
+	*__p = (typeof(*p_ptr))(++__v);					\
+} while (0)
+
 #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
 #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
 #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 928121f06054..6b6413d88b0a 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -32,7 +32,11 @@
 #include "intel_guc_submission.h"
 #include "i915_drv.h"
 
-#define GUC_PREEMPT_FINISHED		0x1
+enum {
+	GUC_PREEMPT_NONE = 0,
+	GUC_PREEMPT_INPROGRESS,
+	GUC_PREEMPT_FINISHED,
+};
 #define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
 #define GUC_PREEMPT_BREADCRUMB_BYTES	\
 	(sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
@@ -537,15 +541,11 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 	u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
 	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
-	spin_lock(&client->wq_lock);
-
 	guc_wq_item_append(client, engine->guc_id, ctx_desc,
 			   ring_tail, rq->fence.seqno);
 	guc_ring_doorbell(client);
 
 	client->submissions[engine->id] += 1;
-
-	spin_unlock(&client->wq_lock);
 }
 
 /*
@@ -631,8 +631,9 @@ static void inject_preempt_context(struct work_struct *work)
 	data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
 
 	if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
-		execlists_clear_active(&engine->execlists,
-				       EXECLISTS_ACTIVE_PREEMPT);
+		intel_write_status_page(engine,
+					I915_GEM_HWS_PREEMPT,
+					GUC_PREEMPT_NONE);
 		tasklet_schedule(&engine->execlists.tasklet);
 	}
 
@@ -672,8 +673,6 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
 
-	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
 	if (inject_preempt_hang(execlists))
 		return;
 
@@ -681,89 +680,90 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
 	execlists_unwind_incomplete_requests(execlists);
 
 	wait_for_guc_preempt_report(engine);
-	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);
+	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, GUC_PREEMPT_NONE);
 }
 
-/**
- * guc_submit() - Submit commands through GuC
- * @engine: engine associated with the commands
- *
- * The only error here arises if the doorbell hardware isn't functioning
- * as expected, which really shouln't happen.
- */
-static void guc_submit(struct intel_engine_cs *engine)
+static void guc_submit(struct intel_engine_cs *engine,
+		       struct i915_request **out,
+		       struct i915_request **end)
 {
 	struct intel_guc *guc = &engine->i915->guc;
-	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	unsigned int n;
+	struct intel_guc_client *client = guc->execbuf_client;
 
-	for (n = 0; n < execlists_num_ports(execlists); n++) {
-		struct i915_request *rq;
-		unsigned int count;
+	spin_lock(&client->wq_lock);
 
-		rq = port_unpack(&port[n], &count);
-		if (rq && count == 0) {
-			port_set(&port[n], port_pack(rq, ++count));
+	do {
+		struct i915_request *rq = *out++;
 
-			flush_ggtt_writes(rq->ring->vma);
+		flush_ggtt_writes(rq->ring->vma);
+		guc_add_request(guc, rq);
+	} while (out != end);
 
-			guc_add_request(guc, rq);
-		}
-	}
+	spin_unlock(&client->wq_lock);
 }
 
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
+static inline int rq_prio(const struct i915_request *rq)
 {
-	GEM_BUG_ON(port_isset(port));
-
-	port_set(port, i915_request_get(rq));
+	return rq->sched.attr.priority | __NO_PREEMPTION;
 }
 
-static inline int rq_prio(const struct i915_request *rq)
+static struct i915_request *schedule_in(struct i915_request *rq, int idx)
 {
-	return rq->sched.attr.priority;
+	trace_i915_request_in(rq, idx);
+
+	if (!rq->hw_context->inflight)
+		rq->hw_context->inflight = rq->engine;
+	intel_context_inflight_inc(rq->hw_context);
+
+	return i915_request_get(rq);
 }
 
-static inline int port_prio(const struct execlist_port *port)
+static void schedule_out(struct i915_request *rq)
 {
-	return rq_prio(port_request(port)) | __NO_PREEMPTION;
+	trace_i915_request_out(rq);
+
+	intel_context_inflight_dec(rq->hw_context);
+	if (!intel_context_inflight_count(rq->hw_context))
+		rq->hw_context->inflight = NULL;
+
+	i915_request_put(rq);
 }
 
-static bool __guc_dequeue(struct intel_engine_cs *engine)
+static void __guc_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	struct i915_request *last = NULL;
-	const struct execlist_port * const last_port =
-		&execlists->port[execlists->port_mask];
+	struct i915_request **first = execlists->inflight;
+	struct i915_request ** const last_port = first + execlists->port_mask;
+	struct i915_request *last = first[0];
+	struct i915_request **port;
 	bool submit = false;
 	struct rb_node *rb;
 
 	lockdep_assert_held(&engine->active.lock);
 
-	if (port_isset(port)) {
+	if (last) {
 		if (intel_engine_has_preemption(engine)) {
 			struct guc_preempt_work *preempt_work =
 				&engine->i915->guc.preempt_work[engine->id];
 			int prio = execlists->queue_priority_hint;
 
-			if (i915_scheduler_need_preempt(prio,
-							port_prio(port))) {
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_PREEMPT);
+			if (i915_scheduler_need_preempt(prio, rq_prio(last))) {
+				intel_write_status_page(engine,
+							I915_GEM_HWS_PREEMPT,
+							GUC_PREEMPT_INPROGRESS);
 				queue_work(engine->i915->guc.preempt_wq,
 					   &preempt_work->work);
-				return false;
+				return;
 			}
 		}
 
-		port++;
-		if (port_isset(port))
-			return false;
+		if (*++first)
+			return;
+
+		last = NULL;
 	}
-	GEM_BUG_ON(port_isset(port));
 
+	port = first;
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
@@ -774,18 +774,15 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 				if (port == last_port)
 					goto done;
 
-				if (submit)
-					port_assign(port, last);
+				*port = schedule_in(last,
+						    port - execlists->inflight);
 				port++;
 			}
 
 			list_del_init(&rq->sched.link);
-
 			__i915_request_submit(rq);
-			trace_i915_request_in(rq, port_index(port, execlists));
-
-			last = rq;
 			submit = true;
+			last = rq;
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
@@ -794,58 +791,41 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 done:
 	execlists->queue_priority_hint =
 		rb ? to_priolist(rb)->priority : INT_MIN;
-	if (submit)
-		port_assign(port, last);
-	if (last)
-		execlists_user_begin(execlists, execlists->port);
-
-	/* We must always keep the beast fed if we have work piled up */
-	GEM_BUG_ON(port_isset(execlists->port) &&
-		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
-		   !port_isset(execlists->port));
-
-	return submit;
-}
-
-static void guc_dequeue(struct intel_engine_cs *engine)
-{
-	if (__guc_dequeue(engine))
-		guc_submit(engine);
+	if (submit) {
+		*port = schedule_in(last, port - execlists->inflight);
+		*++port = NULL;
+		guc_submit(engine, first, port);
+	}
+	execlists->active = execlists->inflight;
 }
 
 static void guc_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	struct i915_request *rq;
+	struct i915_request **port, *rq;
 	unsigned long flags;
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	rq = port_request(port);
-	while (rq && i915_request_completed(rq)) {
-		trace_i915_request_out(rq);
-		i915_request_put(rq);
+	for (port = execlists->inflight; (rq = *port); port++) {
+		if (!i915_request_completed(rq))
+			break;
 
-		port = execlists_port_complete(execlists, port);
-		if (port_isset(port)) {
-			execlists_user_begin(execlists, port);
-			rq = port_request(port);
-		} else {
-			execlists_user_end(execlists);
-			rq = NULL;
-		}
+		schedule_out(rq);
+	}
+	if (port != execlists->inflight) {
+		int idx = port - execlists->inflight;
+		int rem = ARRAY_SIZE(execlists->inflight) - idx;
+		memmove(execlists->inflight, port, rem * sizeof(*port));
 	}
 
-	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
-	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
+	if (intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
 	    GUC_PREEMPT_FINISHED)
 		complete_preempt_context(engine);
 
-	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
-		guc_dequeue(engine);
+	if (!intel_read_status_page(engine, I915_GEM_HWS_PREEMPT))
+		__guc_dequeue(engine);
 
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -959,7 +939,6 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
-	GEM_BUG_ON(port_isset(execlists->port));
 
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -1422,7 +1401,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 	 * and it is guaranteed that it will remove the work item from the
 	 * queue before our request is completed.
 	 */
-	BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.port) *
+	BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) *
 		     sizeof(struct guc_wq_item) *
 		     I915_NUM_ENGINES > GUC_WQ_SIZE);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 11278bac3a24..1bd1ee95cf8c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -366,13 +366,15 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 
 		if (!wait_event_timeout(wait->wait,
 					i915_sw_fence_done(wait),
-					HZ / 2)) {
+					5 * HZ)) {
 			struct i915_request *rq = requests[count - 1];
 
-			pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
-			       count,
+			pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
+			       atomic_read(&wait->pending), count,
 			       rq->fence.context, rq->fence.seqno,
 			       t->engine->name);
+			GEM_TRACE_DUMP();
+
 			i915_gem_set_wedged(t->engine->i915);
 			GEM_BUG_ON(!i915_request_completed(rq));
 			i915_sw_fence_wait(wait);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 6/8] drm/i915/execlists: Minimalistic timeslicing
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (4 preceding siblings ...)
  2019-06-12  9:31 ` [PATCH 5/8] drm/i915/execlists: Preempt-to-busy Chris Wilson
@ 2019-06-12  9:31 ` Chris Wilson
  2019-06-12  9:31 ` [PATCH 7/8] drm/i915/execlists: Force preemption Chris Wilson
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2019-06-12  9:31 UTC (permalink / raw)
  To: intel-gfx

If we have multiple contexts of equal priority pending execution,
activate a timer to demote the currently executing context in favour of
the next in the queue when that timeslice expires. This enforces
fairness between contexts (so long as they allow preemption -- forced
preemption, in the future, will kick those who do not obey) and allows
us to avoid userspace blocking forward progress with e.g. unbounded
MI_SEMAPHORE_WAIT.

For the starting point here, we use the jiffie as our timeslice so that
we should be reasonably efficient wrt frequent CPU wakeups.

Testcase: igt/gem_exec_scheduler/semaphore-resolve
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   6 +
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 111 +++++++++
 drivers/gpu/drm/i915/gt/selftest_lrc.c       | 223 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.c        |   1 +
 drivers/gpu/drm/i915/i915_scheduler_types.h  |   1 +
 5 files changed, 342 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index dd0082df42cc..11a25f060fed 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -12,6 +12,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/llist.h>
+#include <linux/timer.h>
 #include <linux/types.h>
 
 #include "i915_gem.h"
@@ -137,6 +138,11 @@ struct intel_engine_execlists {
 	 */
 	struct tasklet_struct tasklet;
 
+	/**
+	 * @timer: kick the current context if its timeslice expires
+	 */
+	struct timer_list timer;
+
 	/**
 	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
 	 */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 2b12ec9bc0d4..fa8d900da026 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -255,6 +255,7 @@ static int effective_prio(const struct i915_request *rq)
 		prio |= I915_PRIORITY_NOSEMAPHORE;
 
 	/* Restrict mere WAIT boosts from triggering preemption */
+	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
 	return prio | __NO_PREEMPTION;
 }
 
@@ -811,6 +812,81 @@ last_active(const struct intel_engine_execlists *execlists)
 	return *last;
 }
 
+static void
+defer_request(struct i915_request * const rq, struct list_head * const pl)
+{
+	struct i915_dependency *p;
+
+	/*
+	 * We want to move the interrupted request to the back of
+	 * the round-robin list (i.e. its priority level), but
+	 * in doing so, we must then move all requests that were in
+	 * flight and were waiting for the interrupted request to
+	 * be run after it again.
+	 */
+	list_move_tail(&rq->sched.link, pl);
+
+	list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+		struct i915_request *w =
+			container_of(p->waiter, typeof(*w), sched);
+
+		/* Leave semaphores spinning on the other engines */
+		if (w->engine != rq->engine)
+			continue;
+
+		/* No waiter should start before the active request completed */
+		GEM_BUG_ON(i915_request_started(w));
+
+		GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
+		if (rq_prio(w) < rq_prio(rq))
+			continue;
+
+		if (list_empty(&w->sched.link))
+			continue; /* Not yet submitted; unready */
+
+		/*
+		 * This should be very shallow as it is limited by the
+		 * number of requests that can fit in a ring (<64) and
+		 * the number of contexts that can be in flight on this
+		 * engine.
+		 */
+		defer_request(w, pl);
+	}
+}
+
+static void defer_active(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	rq = __unwind_incomplete_requests(engine);
+	if (!rq)
+		return;
+
+	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
+}
+
+static bool
+need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+	int hint;
+
+	if (list_is_last(&rq->sched.link, &engine->active.requests))
+		return false;
+
+	hint = max(rq_prio(list_next_entry(rq, sched.link)),
+		   engine->execlists.queue_priority_hint);
+
+	return hint >= rq_prio(rq);
+}
+
+static bool
+enable_timeslice(struct intel_engine_cs *engine)
+{
+	struct i915_request *last = last_active(&engine->execlists);
+
+	return last && need_timeslice(engine, last);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -904,6 +980,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 */
 			last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 			last = NULL;
+		} else if (need_timeslice(engine, last) &&
+			   !timer_pending(&engine->execlists.timer)) {
+			GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n",
+				  engine->name,
+				  last->fence.context,
+				  last->fence.seqno,
+				  last->sched.attr.priority,
+				  execlists->queue_priority_hint);
+
+			ring_pause(engine) = 1;
+			defer_active(engine);
+
+			/*
+			 * Unlike for preemption, if we rewind and continue
+			 * executing the same context as previously active,
+			 * the order of execution will remain the same and
+			 * the tail will only advance. We do not need to
+			 * force a full context restore, as a lite-restore
+			 * is sufficient to resample the monotonic TAIL.
+			 */
+			last = NULL;
 		} else {
 			/*
 			 * Otherwise if we already have a request pending
@@ -1226,6 +1323,9 @@ static void process_csb(struct intel_engine_cs *engine)
 				       sizeof(*execlists->pending));
 			execlists->pending[0] = NULL;
 
+			if (enable_timeslice(engine))
+				mod_timer(&execlists->timer, jiffies + 1);
+
 			if (!inject_preempt_hang(execlists))
 				ring_pause(engine) = 0;
 		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
@@ -1296,6 +1396,15 @@ static void execlists_submission_tasklet(unsigned long data)
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
+static void execlists_submission_timer(struct timer_list *timer)
+{
+	struct intel_engine_cs *engine =
+		from_timer(engine, timer, execlists.timer);
+
+	/* Kick the tasklet for some interrupt coalescing and reset handling */
+	tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
 static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_sched_node *node,
 			  int prio)
@@ -2525,6 +2634,7 @@ static int gen8_init_rcs_context(struct i915_request *rq)
 
 static void execlists_park(struct intel_engine_cs *engine)
 {
+	del_timer_sync(&engine->execlists.timer);
 	intel_engine_park(engine);
 }
 
@@ -2622,6 +2732,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 
 	tasklet_init(&engine->execlists.tasklet,
 		     execlists_submission_tasklet, (unsigned long)engine);
+	timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
 
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index f0ca2a09dabd..9ba6bffff3e3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -79,6 +79,225 @@ static int live_sanitycheck(void *arg)
 	return err;
 }
 
+static int
+emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 10);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_NEQ_SDD;
+	*cs++ = 0;
+	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
+	*cs++ = 0;
+
+	if (idx > 0) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
+		*cs++ = 0;
+		*cs++ = 1;
+	} else {
+		*cs++ = MI_NOOP;
+		*cs++ = MI_NOOP;
+		*cs++ = MI_NOOP;
+		*cs++ = MI_NOOP;
+	}
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+	intel_ring_advance(rq, cs);
+	return 0;
+}
+
+static struct i915_request *
+semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
+{
+	struct i915_gem_context *ctx;
+	struct i915_request *rq;
+	int err;
+
+	ctx = kernel_context(engine->i915);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq))
+		goto out_ctx;
+
+	err = emit_semaphore_chain(rq, vma, idx);
+	i915_request_add(rq);
+	if (err)
+		rq = ERR_PTR(err);
+
+out_ctx:
+	kernel_context_close(ctx);
+	return rq;
+}
+
+static int
+release_queue(struct intel_engine_cs *engine,
+	      struct i915_vma *vma,
+	      int idx)
+{
+	struct i915_sched_attr attr = {
+		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+	};
+	struct i915_request *rq;
+	u32 *cs;
+
+	rq = i915_request_create(engine->kernel_context);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		return PTR_ERR(cs);
+	}
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
+	*cs++ = 0;
+	*cs++ = 1;
+
+	intel_ring_advance(rq, cs);
+	i915_request_add(rq);
+
+	engine->schedule(rq, &attr);
+
+	return 0;
+}
+
+static int
+slice_semaphore_queue(struct intel_engine_cs *outer,
+		      struct i915_vma *vma,
+		      int count)
+{
+	struct intel_engine_cs *engine;
+	struct i915_request *head;
+	enum intel_engine_id id;
+	int err, i, n = 0;
+
+	head = semaphore_queue(outer, vma, n++);
+	if (IS_ERR(head))
+		return PTR_ERR(head);
+
+	i915_request_get(head);
+	for_each_engine(engine, outer->i915, id) {
+		for (i = 0; i < count; i++) {
+			struct i915_request *rq;
+
+			rq = semaphore_queue(engine, vma, n++);
+			if (IS_ERR(rq)) {
+				err = PTR_ERR(rq);
+				goto out;
+			}
+		}
+	}
+
+	err = release_queue(outer, vma, n);
+	if (err)
+		goto out;
+
+	if (i915_request_wait(head,
+			      I915_WAIT_LOCKED,
+			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
+		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
+		       count, n);
+		GEM_TRACE_DUMP();
+		i915_gem_set_wedged(outer->i915);
+		err = -EIO;
+	}
+
+out:
+	i915_request_put(head);
+	return err;
+}
+
+static int live_timeslice_preempt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+	struct i915_vma *vma;
+	void *vaddr;
+	int err = 0;
+	int count;
+
+	/*
+	 * If a request takes too long, we would like to give other users
+	 * a fair go on the GPU. In particular, users may create batches
+	 * that wait upon external input, where that input may even be
+	 * supplied by another GPU job. To avoid blocking forever, we
+	 * need to preempt the current task and replace it with another
+	 * ready task.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto err_unlock;
+	}
+
+	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+	if (err)
+		goto err_map;
+
+	for_each_prime_number_from(count, 1, 16) {
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
+
+		for_each_engine(engine, i915, id) {
+			memset(vaddr, 0, PAGE_SIZE);
+
+			err = slice_semaphore_queue(engine, vma, count);
+			if (err)
+				goto err_pin;
+
+			if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+				err = -EIO;
+				goto err_pin;
+			}
+		}
+	}
+
+err_pin:
+	i915_vma_unpin(vma);
+err_map:
+	i915_gem_object_unpin_map(obj);
+err_obj:
+	i915_gem_object_put(obj);
+err_unlock:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return err;
+}
+
 static int live_busywait_preempt(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -398,6 +617,9 @@ static int live_late_preempt(void *arg)
 	if (!ctx_lo)
 		goto err_ctx_hi;
 
+	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
+	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
+
 	for_each_engine(engine, i915, id) {
 		struct igt_live_test t;
 		struct i915_request *rq;
@@ -1818,6 +2040,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_sanitycheck),
+		SUBTEST(live_timeslice_preempt),
 		SUBTEST(live_busywait_preempt),
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index b1ba3e65cd52..0bd452e851d8 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -394,6 +394,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 		list_add(&dep->wait_link, &signal->waiters_list);
 		list_add(&dep->signal_link, &node->signalers_list);
 		dep->signaler = signal;
+		dep->waiter = node;
 		dep->flags = flags;
 
 		/* Keep track of whether anyone on this chain has a semaphore */
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 3e309631bd0b..aad81acba9dc 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -62,6 +62,7 @@ struct i915_sched_node {
 
 struct i915_dependency {
 	struct i915_sched_node *signaler;
+	struct i915_sched_node *waiter;
 	struct list_head signal_link;
 	struct list_head wait_link;
 	struct list_head dfs_link;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 7/8] drm/i915/execlists: Force preemption
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (5 preceding siblings ...)
  2019-06-12  9:31 ` [PATCH 6/8] drm/i915/execlists: Minimalistic timeslicing Chris Wilson
@ 2019-06-12  9:31 ` Chris Wilson
  2019-06-12  9:31 ` [PATCH 8/8] drm/i915: Add a label for config DRM_I915_SPIN_REQUEST Chris Wilson
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2019-06-12  9:31 UTC (permalink / raw)
  To: intel-gfx

If the preempted context takes too long to relinquish control, e.g. it
is stuck inside a shader with arbitration disabled, evict that context
with an engine reset. This ensures that preemptions are reasonably
responsive, providing a tighter QoS for the more important context at
the cost of flagging unresponsive contexts more frequently (i.e. instead
of using an ~10s hangcheck, we now evict at ~10ms).  The challenge of
lies in picking a timeout that can be reasonably serviced by HW for
typical workloads, balancing the existing clients against the needs for
responsiveness.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile | 12 ++++++
 drivers/gpu/drm/i915/gt/intel_lrc.c  | 56 ++++++++++++++++++++++++++--
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 4fd1ea639d0f..613b753cb27a 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -25,3 +25,15 @@ config DRM_I915_SPIN_REQUEST
 	  May be 0 to disable the initial spin. In practice, we estimate
 	  the cost of enabling the interrupt (if currently disabled) to be
 	  a few microseconds.
+
+config DRM_I915_PREEMPT_TIMEOUT
+	int "Preempt timeout (ms)"
+	default 10 # milliseconds
+	help
+	  How long to wait (in milliseconds) for a preemption event to occur
+	  when submitting a new context via execlists. If the current context
+	  does not hit an arbitration point and yield to HW before the timer
+	  expires, the HW will be reset to allow the more important context
+	  to execute.
+
+	  May be 0 to disable the timeout.
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index fa8d900da026..f8cdca309cd4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -887,6 +887,15 @@ enable_timeslice(struct intel_engine_cs *engine)
 	return last && need_timeslice(engine, last);
 }
 
+static unsigned long preempt_expires(void)
+{
+	unsigned long timeout =
+		msecs_to_jiffies_timeout(CONFIG_DRM_I915_PREEMPT_TIMEOUT);
+
+	barrier();
+	return jiffies + timeout;
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1218,6 +1227,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		*port = execlists_schedule_in(last, port - execlists->pending);
 		memset(port + 1, 0, (last_port - port) * sizeof(*port));
 		execlists_submit_ports(engine);
+
+		if (CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+			mod_timer(&execlists->timer, preempt_expires());
 	}
 }
 
@@ -1373,13 +1385,48 @@ static void process_csb(struct intel_engine_cs *engine)
 	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
 }
 
-static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
+static bool __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
 	lockdep_assert_held(&engine->active.lock);
 
 	process_csb(engine);
-	if (!engine->execlists.pending[0])
+	if (!engine->execlists.pending[0]) {
 		execlists_dequeue(engine);
+		return true;
+	}
+
+	return false;
+}
+
+static void preempt_reset(struct intel_engine_cs *engine)
+{
+	const unsigned int bit = I915_RESET_ENGINE + engine->id;
+	unsigned long *lock = &engine->i915->gpu_error.flags;
+
+	if (test_and_set_bit(bit, lock))
+		return;
+
+	tasklet_disable_nosync(&engine->execlists.tasklet);
+	spin_unlock(&engine->active.lock);
+
+	i915_reset_engine(engine, "preemption time out");
+
+	spin_lock(&engine->active.lock);
+	tasklet_enable(&engine->execlists.tasklet);
+
+	clear_bit(bit, lock);
+	wake_up_bit(lock, bit);
+}
+
+static bool preempt_timeout(struct intel_engine_cs *const engine)
+{
+	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+		return false;
+
+	if (!intel_engine_has_preemption(engine))
+		return false;
+
+	return !timer_pending(&engine->execlists.timer);
 }
 
 /*
@@ -1392,7 +1439,10 @@ static void execlists_submission_tasklet(unsigned long data)
 	unsigned long flags;
 
 	spin_lock_irqsave(&engine->active.lock, flags);
-	__execlists_submission_tasklet(engine);
+
+	if (!__execlists_submission_tasklet(engine) && preempt_timeout(engine))
+		preempt_reset(engine);
+
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 8/8] drm/i915: Add a label for config DRM_I915_SPIN_REQUEST
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (6 preceding siblings ...)
  2019-06-12  9:31 ` [PATCH 7/8] drm/i915/execlists: Force preemption Chris Wilson
@ 2019-06-12  9:31 ` Chris Wilson
  2019-06-12  9:53 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Patchwork
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2019-06-12  9:31 UTC (permalink / raw)
  To: intel-gfx

If we don't give it a label, it does not appear as a configuration
option.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 613b753cb27a..8273d3baafe4 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -13,7 +13,7 @@ config DRM_I915_USERFAULT_AUTOSUSPEND
 	  runtime pm autosuspend delay tunable.
 
 config DRM_I915_SPIN_REQUEST
-	int
+	int "Busywait for request completion (us)"
 	default 5 # microseconds
 	help
 	  Before sleeping waiting for a request (GPU operation) to complete,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (7 preceding siblings ...)
  2019-06-12  9:31 ` [PATCH 8/8] drm/i915: Add a label for config DRM_I915_SPIN_REQUEST Chris Wilson
@ 2019-06-12  9:53 ` Patchwork
  2019-06-12  9:57 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2019-06-12  9:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
URL   : https://patchwork.freedesktop.org/series/61946/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
07bc4de0ab32 drm/i915: Keep contexts pinned until after the next kernel context switch
e1f10e9ec8a6 drm/i915: Stop retiring along engine
4740389e1a7a drm/i915: Replace engine->timeline with a plain list
-:180: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#180: FILE: drivers/gpu/drm/i915/gt/intel_engine_types.h:292:
+		spinlock_t lock;

total: 0 errors, 0 warnings, 1 checks, 968 lines checked
0a6152d74ef2 drm/i915: Flush the execution-callbacks on retiring
2286206e3ffc drm/i915/execlists: Preempt-to-busy
-:1494: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'p_ptr' - possible side-effects?
#1494: FILE: drivers/gpu/drm/i915/i915_utils.h:134:
+#define ptr_count_dec(p_ptr) do {					\
+	typeof(p_ptr) __p = (p_ptr);					\
+	unsigned long __v = (unsigned long)(*__p);			\
+	*__p = (typeof(*p_ptr))(--__v);					\
+} while (0)

-:1500: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'p_ptr' - possible side-effects?
#1500: FILE: drivers/gpu/drm/i915/i915_utils.h:140:
+#define ptr_count_inc(p_ptr) do {					\
+	typeof(p_ptr) __p = (p_ptr);					\
+	unsigned long __v = (unsigned long)(*__p);			\
+	*__p = (typeof(*p_ptr))(++__v);					\
+} while (0)

-:1783: WARNING:LINE_SPACING: Missing a blank line after declarations
#1783: FILE: drivers/gpu/drm/i915/intel_guc_submission.c:820:
+		int rem = ARRAY_SIZE(execlists->inflight) - idx;
+		memmove(execlists->inflight, port, rem * sizeof(*port));

total: 0 errors, 1 warnings, 2 checks, 1682 lines checked
090ff087a41e drm/i915/execlists: Minimalistic timeslicing
-:345: WARNING:LONG_LINE: line over 100 characters
#345: FILE: drivers/gpu/drm/i915/gt/selftest_lrc.c:211:
+			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {

total: 0 errors, 1 warnings, 0 checks, 426 lines checked
65ce5d40d6da drm/i915/execlists: Force preemption
1915c21b2186 drm/i915: Add a label for config DRM_I915_SPIN_REQUEST

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (8 preceding siblings ...)
  2019-06-12  9:53 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Patchwork
@ 2019-06-12  9:57 ` Patchwork
  2019-06-12 10:16 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2019-06-12  9:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
URL   : https://patchwork.freedesktop.org/series/61946/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Keep contexts pinned until after the next kernel context switch
Okay!

Commit: drm/i915: Stop retiring along engine
Okay!

Commit: drm/i915: Replace engine->timeline with a plain list
Okay!

Commit: drm/i915: Flush the execution-callbacks on retiring
Okay!

Commit: drm/i915/execlists: Preempt-to-busy
-drivers/gpu/drm/i915/selftests/../i915_utils.h:220:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_utils.h:232:16: warning: expression using sizeof(void)

Commit: drm/i915/execlists: Minimalistic timeslicing
+drivers/gpu/drm/i915/gt/intel_lrc.c:876:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gt/intel_lrc.c:876:16: warning: expression using sizeof(void)

Commit: drm/i915/execlists: Force preemption
+
+drivers/gpu/drm/i915/i915_utils.h:232:16: warning: expression using sizeof(void)
+Error in reading or end of file.

Commit: drm/i915: Add a label for config DRM_I915_SPIN_REQUEST
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (9 preceding siblings ...)
  2019-06-12  9:57 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-06-12 10:16 ` Patchwork
  2019-06-12 15:29 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2) Patchwork
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2019-06-12 10:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
URL   : https://patchwork.freedesktop.org/series/61946/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6244 -> Patchwork_13250
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/

Known issues
------------

  Here are the changes found in Patchwork_13250 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@debugfs_test@read_all_entries:
    - fi-cml-u:           [PASS][1] -> [INCOMPLETE][2] ([fdo#110566])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/fi-cml-u/igt@debugfs_test@read_all_entries.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/fi-cml-u/igt@debugfs_test@read_all_entries.html

  * igt@gem_exec_suspend@basic-s3:
    - fi-blb-e6850:       [PASS][3] -> [INCOMPLETE][4] ([fdo#107718])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-6770hq:      [PASS][5] -> [FAIL][6] ([fdo#108511])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live_blt:
    - fi-skl-iommu:       [PASS][7] -> [INCOMPLETE][8] ([fdo#108602])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/fi-skl-iommu/igt@i915_selftest@live_blt.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/fi-skl-iommu/igt@i915_selftest@live_blt.html

  * igt@i915_selftest@live_hangcheck:
    - fi-icl-y:           [PASS][9] -> [INCOMPLETE][10] ([fdo#107713] / [fdo#108569])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/fi-icl-y/igt@i915_selftest@live_hangcheck.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/fi-icl-y/igt@i915_selftest@live_hangcheck.html

  * igt@i915_selftest@live_sanitycheck:
    - fi-icl-u3:          [PASS][11] -> [DMESG-WARN][12] ([fdo#107724])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/fi-icl-u3/igt@i915_selftest@live_sanitycheck.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/fi-icl-u3/igt@i915_selftest@live_sanitycheck.html

  
#### Possible fixes ####

  * igt@core_auth@basic-auth:
    - fi-icl-u3:          [DMESG-WARN][13] ([fdo#107724]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/fi-icl-u3/igt@core_auth@basic-auth.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/fi-icl-u3/igt@core_auth@basic-auth.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#108511]: https://bugs.freedesktop.org/show_bug.cgi?id=108511
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108602]: https://bugs.freedesktop.org/show_bug.cgi?id=108602
  [fdo#110566]: https://bugs.freedesktop.org/show_bug.cgi?id=110566


Participating hosts (51 -> 45)
------------------------------

  Additional (1): fi-bwr-2160 
  Missing    (7): fi-cml-u2 fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_6244 -> Patchwork_13250

  CI_DRM_6244: d7ce900dafd424be9da576fbf3155b43ce5270ec @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5052: ff711b343c06a25ac4995ab8bd9a8bcb5ce1eb10 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13250: 1915c21b218609f12edb1f1ac752b3e81fc084c8 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

1915c21b2186 drm/i915: Add a label for config DRM_I915_SPIN_REQUEST
65ce5d40d6da drm/i915/execlists: Force preemption
090ff087a41e drm/i915/execlists: Minimalistic timeslicing
2286206e3ffc drm/i915/execlists: Preempt-to-busy
0a6152d74ef2 drm/i915: Flush the execution-callbacks on retiring
4740389e1a7a drm/i915: Replace engine->timeline with a plain list
e1f10e9ec8a6 drm/i915: Stop retiring along engine
07bc4de0ab32 drm/i915: Keep contexts pinned until after the next kernel context switch

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12  9:31 ` [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
@ 2019-06-12 13:29   ` Mika Kuoppala
  2019-06-12 13:42     ` Chris Wilson
  2019-06-12 14:26   ` [PATCH v2] " Chris Wilson
  1 sibling, 1 reply; 31+ messages in thread
From: Mika Kuoppala @ 2019-06-12 13:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We need to keep the context image pinned in memory until after the GPU
> has finished writing into it. Since it continues to write as we signal
> the final breadcrumb, we need to keep it pinned until the request after
> it is complete. Currently we know the order in which requests execute on
> each engine, and so to remove that presumption we need to identify a
> request/context-switch we know must occur after our completion. Any
> request queued after the signal must imply a context switch, for
> simplicity we use a fresh request from the kernel context.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 24 ++----
>  drivers/gpu/drm/i915/gem/i915_gem_context.h   |  1 -
>  drivers/gpu/drm/i915/gem/i915_gem_pm.c        | 20 ++++-
>  drivers/gpu/drm/i915/gt/intel_context.c       | 80 ++++++++++++++++---
>  drivers/gpu/drm/i915/gt/intel_context.h       |  3 +
>  drivers/gpu/drm/i915/gt/intel_context_types.h |  6 +-
>  drivers/gpu/drm/i915/gt/intel_engine.h        |  2 -
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 23 +-----
>  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  2 +
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  | 13 +--
>  drivers/gpu/drm/i915/gt/intel_lrc.c           | 62 ++------------
>  drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 44 +---------
>  drivers/gpu/drm/i915/gt/mock_engine.c         | 11 +--
>  drivers/gpu/drm/i915/i915_active.c            | 80 ++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_active.h            |  5 ++
>  drivers/gpu/drm/i915/i915_active_types.h      |  3 +
>  drivers/gpu/drm/i915/i915_gem.c               |  4 -
>  drivers/gpu/drm/i915/i915_request.c           | 15 ----
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 -
>  19 files changed, 214 insertions(+), 185 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index c86ca9f21532..6200060aef05 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -692,17 +692,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> -
> -	for_each_engine(engine, dev_priv, id)
> -		intel_engine_lost_context(engine);
> -}
> -
>  void i915_gem_contexts_fini(struct drm_i915_private *i915)
>  {
>  	lockdep_assert_held(&i915->drm.struct_mutex);
> @@ -1203,10 +1192,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>  	if (ret)
>  		goto out_add;
>  
> -	ret = gen8_emit_rpcs_config(rq, ce, sseu);
> -	if (ret)
> -		goto out_add;
> -
>  	/*
>  	 * Guarantee context image and the timeline remains pinned until the
>  	 * modifying request is retired by setting the ce activity tracker.
> @@ -1214,9 +1199,12 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>  	 * But we only need to take one pin on the account of it. Or in other
>  	 * words transfer the pinned ce object to tracked active request.
>  	 */
> -	if (!i915_active_request_isset(&ce->active_tracker))
> -		__intel_context_pin(ce);
> -	__i915_active_request_set(&ce->active_tracker, rq);
> +	GEM_BUG_ON(i915_active_is_idle(&ce->active));
> +	ret = i915_active_ref(&ce->active, rq->fence.context, rq);


Why the place to keep the context alive is this function?

In other words, if the sseu state is not changed, we bail out early
and don't setup the tracker and thus fail in promise for keeping it alive.

> +	if (ret)
> +		goto out_add;
> +
> +	ret = gen8_emit_rpcs_config(rq, ce, sseu);
>  
>  out_add:
>  	i915_request_add(rq);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> index 630392c77e48..9691dd062f72 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> @@ -134,7 +134,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
>  
>  /* i915_gem_context.c */
>  int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
> -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
>  void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
>  
>  int i915_gem_context_open(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index f40f13c0b8b7..59b6d45b1936 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -10,6 +10,22 @@
>  #include "i915_drv.h"
>  #include "i915_globals.h"
>  
> +static void call_idle_barriers(struct intel_engine_cs *engine)
> +{
> +	struct llist_node *node, *next;
> +
> +	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
> +		struct i915_active_request *active =
> +			container_of((struct list_head *)node,
> +				     typeof(*active), link);
> +
> +		INIT_LIST_HEAD(&active->link);
> +		RCU_INIT_POINTER(active->request, NULL);
> +
> +		active->retire(active, NULL);
> +	}
> +}
> +
>  static void i915_gem_park(struct drm_i915_private *i915)
>  {
>  	struct intel_engine_cs *engine;
> @@ -17,8 +33,10 @@ static void i915_gem_park(struct drm_i915_private *i915)
>  
>  	lockdep_assert_held(&i915->drm.struct_mutex);
>  
> -	for_each_engine(engine, i915, id)
> +	for_each_engine(engine, i915, id) {
> +		call_idle_barriers(engine); /* cleanup after wedging */
>  		i915_gem_batch_pool_fini(&engine->batch_pool);
> +	}
>  
>  	i915_timelines_park(i915);
>  	i915_vma_parked(i915);
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index c78ec0b58e77..c10eb4904264 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce)
>  
>  		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
>  
> -		intel_context_get(ce);
>  		smp_mb__before_atomic(); /* flush pin before it is visible */
>  	}
>  
> @@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce)
>  		ce->ops->unpin(ce);
>  
>  		i915_gem_context_put(ce->gem_context);
> -		intel_context_put(ce);
> +		intel_context_inactive(ce);
>  	}
>  
>  	mutex_unlock(&ce->pin_mutex);
>  	intel_context_put(ce);
>  }
>  
> -static void intel_context_retire(struct i915_active_request *active,
> -				 struct i915_request *rq)
> +static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
>  {
> -	struct intel_context *ce =
> -		container_of(active, typeof(*ce), active_tracker);
> +	int err;
>  
> -	intel_context_unpin(ce);
> +	err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
> +	if (err)
> +		return err;
> +
> +	/*
> +	 * And mark it as a globally pinned object to let the shrinker know
> +	 * it cannot reclaim the object until we release it.
> +	 */
> +	vma->obj->pin_global++;
> +	vma->obj->mm.dirty = true;
> +
> +	return 0;
> +}
> +
> +static void __context_unpin_state(struct i915_vma *vma)
> +{
> +	vma->obj->pin_global--;
> +	__i915_vma_unpin(vma);
> +}
> +
> +static void intel_context_retire(struct i915_active *active)
> +{
> +	struct intel_context *ce = container_of(active, typeof(*ce), active);
> +
> +	if (ce->state)
> +		__context_unpin_state(ce->state);
> +
> +	intel_context_put(ce);
>  }
>  
>  void
> @@ -125,8 +149,46 @@ intel_context_init(struct intel_context *ce,
>  
>  	mutex_init(&ce->pin_mutex);
>  
> -	i915_active_request_init(&ce->active_tracker,
> -				 NULL, intel_context_retire);
> +	i915_active_init(ctx->i915, &ce->active, intel_context_retire);
> +}
> +
> +int intel_context_active(struct intel_context *ce, unsigned long flags)


I can digest this but was missing the verb in this and thought
intel_context_activate|deactivate.

> +{
> +	int err;
> +
> +	if (!i915_active_acquire(&ce->active))
> +		return 0;
> +
> +	intel_context_get(ce);
> +
> +	if (!ce->state)
> +		return 0;
> +
> +	err = __context_pin_state(ce->state, flags);
> +	if (err) {
> +		i915_active_cancel(&ce->active);
> +		intel_context_put(ce);
> +		return err;
> +	}
> +
> +	/* Preallocate tracking nodes */
> +	if (!i915_gem_context_is_kernel(ce->gem_context)) {
> +		err = i915_active_acquire_preallocate_barrier(&ce->active,
> +							      ce->engine);
> +		if (err) {
> +			i915_active_release(&ce->active);
> +			return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +void intel_context_inactive(struct intel_context *ce)
> +{
> +	/* Nodes preallocated in intel_context_active() */
> +	i915_active_acquire_barrier(&ce->active);
> +	i915_active_release(&ce->active);
>  }
>  
>  static void i915_global_context_shrink(void)
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 6d5453ba2c1e..4de4ba2df7d4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -102,6 +102,9 @@ static inline void intel_context_exit(struct intel_context *ce)
>  		ce->ops->exit(ce);
>  }
>  
> +int intel_context_active(struct intel_context *ce, unsigned long flags);
> +void intel_context_inactive(struct intel_context *ce);
> +
>  static inline struct intel_context *intel_context_get(struct intel_context *ce)
>  {
>  	kref_get(&ce->ref);
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 825fcf0ac9c4..e95be4be9612 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -56,10 +56,10 @@ struct intel_context {
>  	intel_engine_mask_t saturated; /* submitting semaphores too late? */
>  
>  	/**
> -	 * active_tracker: Active tracker for the external rq activity
> -	 * on this intel_context object.
> +	 * active: Active tracker for the rq activity (inc. external) on this
> +	 * intel_context object.
>  	 */
> -	struct i915_active_request active_tracker;
> +	struct i915_active active;
>  
>  	const struct intel_context_ops *ops;
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 201bbd2a4faf..b9fd88f21609 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -466,8 +466,6 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
>  bool intel_engine_is_idle(struct intel_engine_cs *engine);
>  bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
>  
> -void intel_engine_lost_context(struct intel_engine_cs *engine);
> -
>  void intel_engines_reset_default_submission(struct drm_i915_private *i915);
>  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index c0d986db5a75..5a08036ae774 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -611,6 +611,8 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
>  {
>  	int err;
>  
> +	init_llist_head(&engine->barrier_tasks);
> +
>  	err = init_status_page(engine);
>  	if (err)
>  		return err;
> @@ -870,6 +872,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>  	if (engine->preempt_context)
>  		intel_context_unpin(engine->preempt_context);
>  	intel_context_unpin(engine->kernel_context);
> +	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
>  
>  	i915_timeline_fini(&engine->timeline);
>  
> @@ -1201,26 +1204,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
>  		engine->set_default_submission(engine);
>  }
>  
> -/**
> - * intel_engine_lost_context: called when the GPU is reset into unknown state
> - * @engine: the engine
> - *
> - * We have either reset the GPU or otherwise about to lose state tracking of
> - * the current GPU logical state (e.g. suspend). On next use, it is therefore
> - * imperative that we make no presumptions about the current state and load
> - * from scratch.
> - */
> -void intel_engine_lost_context(struct intel_engine_cs *engine)
> -{
> -	struct intel_context *ce;
> -
> -	lockdep_assert_held(&engine->i915->drm.struct_mutex);
> -
> -	ce = fetch_and_zero(&engine->last_retired_context);
> -	if (ce)
> -		intel_context_unpin(ce);
> -}
> -
>  bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
>  {
>  	switch (INTEL_GEN(engine->i915)) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index ccf034764741..3c448a061abd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -88,6 +88,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>  
>  	/* Check again on the next retirement. */
>  	engine->wakeref_serial = engine->serial + 1;
> +
> +	i915_request_add_barriers(rq);
>  	__i915_request_commit(rq);
>  
>  	return false;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 01223864237a..33a31aa2d2ae 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -11,6 +11,7 @@
>  #include <linux/irq_work.h>
>  #include <linux/kref.h>
>  #include <linux/list.h>
> +#include <linux/llist.h>
>  #include <linux/types.h>
>  
>  #include "i915_gem.h"
> @@ -288,6 +289,7 @@ struct intel_engine_cs {
>  	struct intel_ring *buffer;
>  
>  	struct i915_timeline timeline;
> +	struct llist_head barrier_tasks;
>  
>  	struct intel_context *kernel_context; /* pinned */
>  	struct intel_context *preempt_context; /* pinned; optional */
> @@ -435,17 +437,6 @@ struct intel_engine_cs {
>  
>  	struct intel_engine_execlists execlists;
>  
> -	/* Contexts are pinned whilst they are active on the GPU. The last
> -	 * context executed remains active whilst the GPU is idle - the
> -	 * switch away and write to the context object only occurs on the
> -	 * next execution.  Contexts are only unpinned on retirement of the
> -	 * following request ensuring that we can always write to the object
> -	 * on the context switch even after idling. Across suspend, we switch
> -	 * to the kernel context and trash it as the save may not happen
> -	 * before the hardware is powered down.
> -	 */
> -	struct intel_context *last_retired_context;
> -
>  	/* status_notifier: list of callbacks for context-switch changes */
>  	struct atomic_notifier_head context_status_notifier;
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index b8f5592da18f..05524489615c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1422,60 +1422,11 @@ static void execlists_context_destroy(struct kref *kref)
>  	intel_context_free(ce);
>  }
>  
> -static int __context_pin(struct i915_vma *vma)
> -{
> -	unsigned int flags;
> -	int err;
> -
> -	flags = PIN_GLOBAL | PIN_HIGH;
> -	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> -
> -	err = i915_vma_pin(vma, 0, 0, flags);
> -	if (err)
> -		return err;
> -
> -	vma->obj->pin_global++;
> -	vma->obj->mm.dirty = true;
> -
> -	return 0;
> -}
> -
> -static void __context_unpin(struct i915_vma *vma)
> -{
> -	vma->obj->pin_global--;
> -	__i915_vma_unpin(vma);
> -}
> -
>  static void execlists_context_unpin(struct intel_context *ce)
>  {
> -	struct intel_engine_cs *engine;
> -
> -	/*
> -	 * The tasklet may still be using a pointer to our state, via an
> -	 * old request. However, since we know we only unpin the context
> -	 * on retirement of the following request, we know that the last
> -	 * request referencing us will have had a completion CS interrupt.
> -	 * If we see that it is still active, it means that the tasklet hasn't
> -	 * had the chance to run yet; let it run before we teardown the
> -	 * reference it may use.
> -	 */
> -	engine = READ_ONCE(ce->inflight);
> -	if (unlikely(engine)) {
> -		unsigned long flags;
> -
> -		spin_lock_irqsave(&engine->timeline.lock, flags);
> -		process_csb(engine);
> -		spin_unlock_irqrestore(&engine->timeline.lock, flags);
> -
> -		GEM_BUG_ON(READ_ONCE(ce->inflight));
> -	}
> -
>  	i915_gem_context_unpin_hw_id(ce->gem_context);
> -
> -	intel_ring_unpin(ce->ring);
> -
>  	i915_gem_object_unpin_map(ce->state->obj);
> -	__context_unpin(ce->state);
> +	intel_ring_unpin(ce->ring);
>  }
>  
>  static void
> @@ -1512,7 +1463,10 @@ __execlists_context_pin(struct intel_context *ce,
>  		goto err;
>  	GEM_BUG_ON(!ce->state);
>  
> -	ret = __context_pin(ce->state);
> +	ret = intel_context_active(ce,
> +				   engine->i915->ggtt.pin_bias |
> +				   PIN_OFFSET_BIAS |
> +				   PIN_HIGH);
>  	if (ret)
>  		goto err;
>  
> @@ -1521,7 +1475,7 @@ __execlists_context_pin(struct intel_context *ce,
>  					I915_MAP_OVERRIDE);
>  	if (IS_ERR(vaddr)) {
>  		ret = PTR_ERR(vaddr);
> -		goto unpin_vma;
> +		goto unpin_active;
>  	}
>  
>  	ret = intel_ring_pin(ce->ring);
> @@ -1542,8 +1496,8 @@ __execlists_context_pin(struct intel_context *ce,
>  	intel_ring_unpin(ce->ring);
>  unpin_map:
>  	i915_gem_object_unpin_map(ce->state->obj);
> -unpin_vma:
> -	__context_unpin(ce->state);
> +unpin_active:
> +	intel_context_inactive(ce);
>  err:
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index c834d016c965..7ab28b6f62a1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1349,45 +1349,9 @@ static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
>  		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
>  }
>  
> -static int __context_pin(struct intel_context *ce)
> -{
> -	struct i915_vma *vma;
> -	int err;
> -
> -	vma = ce->state;
> -	if (!vma)
> -		return 0;
> -
> -	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> -	if (err)
> -		return err;
> -
> -	/*
> -	 * And mark is as a globally pinned object to let the shrinker know
> -	 * it cannot reclaim the object until we release it.
> -	 */
> -	vma->obj->pin_global++;
> -	vma->obj->mm.dirty = true;
> -
> -	return 0;
> -}
> -
> -static void __context_unpin(struct intel_context *ce)
> -{
> -	struct i915_vma *vma;
> -
> -	vma = ce->state;
> -	if (!vma)
> -		return;
> -
> -	vma->obj->pin_global--;
> -	i915_vma_unpin(vma);
> -}
> -
>  static void ring_context_unpin(struct intel_context *ce)
>  {
>  	__context_unpin_ppgtt(ce->gem_context);
> -	__context_unpin(ce);
>  }
>  
>  static struct i915_vma *
> @@ -1477,18 +1441,18 @@ static int ring_context_pin(struct intel_context *ce)
>  		ce->state = vma;
>  	}
>  
> -	err = __context_pin(ce);
> +	err = intel_context_active(ce, PIN_HIGH);
>  	if (err)
>  		return err;
>  
>  	err = __context_pin_ppgtt(ce->gem_context);
>  	if (err)
> -		goto err_unpin;
> +		goto err_active;
>  
>  	return 0;
>  
> -err_unpin:
> -	__context_unpin(ce);
> +err_active:
> +	intel_context_inactive(ce);
>  	return err;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index 6d7562769eb2..b7675ef18523 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -146,12 +146,18 @@ static void mock_context_destroy(struct kref *ref)
>  
>  static int mock_context_pin(struct intel_context *ce)
>  {
> +	int ret;
> +
>  	if (!ce->ring) {
>  		ce->ring = mock_ring(ce->engine);
>  		if (!ce->ring)
>  			return -ENOMEM;
>  	}
>  
> +	ret = intel_context_active(ce, PIN_HIGH);
> +	if (ret)
> +		return ret;
> +
>  	mock_timeline_pin(ce->ring->timeline);
>  	return 0;
>  }
> @@ -328,14 +334,9 @@ void mock_engine_free(struct intel_engine_cs *engine)
>  {
>  	struct mock_engine *mock =
>  		container_of(engine, typeof(*mock), base);
> -	struct intel_context *ce;
>  
>  	GEM_BUG_ON(timer_pending(&mock->hw_delay));
>  
> -	ce = fetch_and_zero(&engine->last_retired_context);
> -	if (ce)
> -		intel_context_unpin(ce);
> -
>  	intel_context_unpin(engine->kernel_context);
>  
>  	intel_engine_fini_breadcrumbs(engine);
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 863ae12707ba..100e40afc9d6 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -100,7 +100,7 @@ active_instance(struct i915_active *ref, u64 idx)
>  		parent = *p;
>  
>  		node = rb_entry(parent, struct active_node, node);
> -		if (node->timeline == idx)
> +		if (node->timeline == idx && !IS_ERR(node->base.request))

Is this related change?

-Mika

>  			goto replace;
>  
>  		if (node->timeline < idx)
> @@ -157,6 +157,7 @@ void i915_active_init(struct drm_i915_private *i915,
>  	ref->retire = retire;
>  	ref->tree = RB_ROOT;
>  	i915_active_request_init(&ref->last, NULL, last_retire);
> +	init_llist_head(&ref->barriers);
>  	ref->count = 0;
>  }
>  
> @@ -263,6 +264,83 @@ void i915_active_fini(struct i915_active *ref)
>  }
>  #endif
>  
> +int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
> +					    struct intel_engine_cs *engine)
> +{
> +	struct drm_i915_private *i915 = engine->i915;
> +	unsigned long tmp;
> +	int err = 0;
> +
> +	GEM_BUG_ON(!engine->mask);
> +	for_each_engine_masked(engine, i915, engine->mask, tmp) {
> +		struct intel_context *kctx = engine->kernel_context;
> +		struct active_node *node;
> +
> +		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
> +		if (unlikely(!node)) {
> +			err = -ENOMEM;
> +			break;
> +		}
> +
> +		i915_active_request_init(&node->base,
> +					 (void *)engine, node_retire);
> +		node->timeline = kctx->ring->timeline->fence_context;
> +		node->ref = ref;
> +		ref->count++;
> +
> +		llist_add((struct llist_node *)&node->base.link,
> +			  &ref->barriers);
> +	}
> +
> +	return err;
> +}
> +
> +void i915_active_acquire_barrier(struct i915_active *ref)
> +{
> +	struct llist_node *pos, *next;
> +
> +	i915_active_acquire(ref);
> +
> +	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
> +		struct intel_engine_cs *engine;
> +		struct active_node *node;
> +		struct rb_node **p, *parent;
> +
> +		node = container_of((struct list_head *)pos,
> +				    typeof(*node), base.link);
> +
> +		engine = (void *)rcu_access_pointer(node->base.request);
> +		RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
> +
> +		parent = NULL;
> +		p = &ref->tree.rb_node;
> +		while (*p) {
> +			parent = *p;
> +			if (rb_entry(parent,
> +				     struct active_node,
> +				     node)->timeline < node->timeline)
> +				p = &parent->rb_right;
> +			else
> +				p = &parent->rb_left;
> +		}
> +		rb_link_node(&node->node, parent, p);
> +		rb_insert_color(&node->node, &ref->tree);
> +
> +		llist_add((struct llist_node *)&node->base.link,
> +			  &engine->barrier_tasks);
> +	}
> +	i915_active_release(ref);
> +}
> +
> +void i915_request_add_barriers(struct i915_request *rq)
> +{
> +	struct intel_engine_cs *engine = rq->engine;
> +	struct llist_node *node, *next;
> +
> +	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
> +		list_add_tail((struct list_head *)node, &rq->active_list);
> +}
> +
>  int i915_active_request_set(struct i915_active_request *active,
>  			    struct i915_request *rq)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 7d758719ce39..d55d37673944 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -406,4 +406,9 @@ void i915_active_fini(struct i915_active *ref);
>  static inline void i915_active_fini(struct i915_active *ref) { }
>  #endif
>  
> +int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
> +					    struct intel_engine_cs *engine);
> +void i915_active_acquire_barrier(struct i915_active *ref);
> +void i915_request_add_barriers(struct i915_request *rq);
> +
>  #endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> index b679253b53a5..c025991b9233 100644
> --- a/drivers/gpu/drm/i915/i915_active_types.h
> +++ b/drivers/gpu/drm/i915/i915_active_types.h
> @@ -7,6 +7,7 @@
>  #ifndef _I915_ACTIVE_TYPES_H_
>  #define _I915_ACTIVE_TYPES_H_
>  
> +#include <linux/llist.h>
>  #include <linux/rbtree.h>
>  #include <linux/rcupdate.h>
>  
> @@ -31,6 +32,8 @@ struct i915_active {
>  	unsigned int count;
>  
>  	void (*retire)(struct i915_active *ref);
> +
> +	struct llist_head barriers;
>  };
>  
>  #endif /* _I915_ACTIVE_TYPES_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e980c1ee3dcf..0663f2df65d6 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1197,10 +1197,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>  
>  	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
>  	intel_runtime_pm_put(i915, wakeref);
> -
> -	mutex_lock(&i915->drm.struct_mutex);
> -	i915_gem_contexts_lost(i915);
> -	mutex_unlock(&i915->drm.struct_mutex);
>  }
>  
>  void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index e9b59eea4f10..9eff9de7fa10 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -213,18 +213,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
>  	spin_unlock(&rq->lock);
>  
>  	local_irq_enable();
> -
> -	/*
> -	 * The backing object for the context is done after switching to the
> -	 * *next* context. Therefore we cannot retire the previous context until
> -	 * the next context has already started running. However, since we
> -	 * cannot take the required locks at i915_request_submit() we
> -	 * defer the unpinning of the active context to now, retirement of
> -	 * the subsequent request.
> -	 */
> -	if (engine->last_retired_context)
> -		intel_context_unpin(engine->last_retired_context);
> -	engine->last_retired_context = rq->hw_context;
>  }
>  
>  static void __retire_engine_upto(struct intel_engine_cs *engine,
> @@ -759,9 +747,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>  
>  	rq->infix = rq->ring->emit; /* end of header; start of user payload */
>  
> -	/* Keep a second pin for the dual retirement along engine and ring */
> -	__intel_context_pin(ce);
> -
>  	intel_context_mark_active(ce);
>  	return rq;
>  
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index b7f3fbb4ae89..a96d0c012d46 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -56,7 +56,6 @@ static void mock_device_release(struct drm_device *dev)
>  
>  	mutex_lock(&i915->drm.struct_mutex);
>  	mock_device_flush(i915);
> -	i915_gem_contexts_lost(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	flush_work(&i915->gem.idle_work);
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12 13:29   ` Mika Kuoppala
@ 2019-06-12 13:42     ` Chris Wilson
  2019-06-12 14:09       ` Mika Kuoppala
  0 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2019-06-12 13:42 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-06-12 14:29:48)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > We need to keep the context image pinned in memory until after the GPU
> > has finished writing into it. Since it continues to write as we signal
> > the final breadcrumb, we need to keep it pinned until the request after
> > it is complete. Currently we know the order in which requests execute on
> > each engine, and so to remove that presumption we need to identify a
> > request/context-switch we know must occur after our completion. Any
> > request queued after the signal must imply a context switch, for
> > simplicity we use a fresh request from the kernel context.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 24 ++----
> >  drivers/gpu/drm/i915/gem/i915_gem_context.h   |  1 -
> >  drivers/gpu/drm/i915/gem/i915_gem_pm.c        | 20 ++++-
> >  drivers/gpu/drm/i915/gt/intel_context.c       | 80 ++++++++++++++++---
> >  drivers/gpu/drm/i915/gt/intel_context.h       |  3 +
> >  drivers/gpu/drm/i915/gt/intel_context_types.h |  6 +-
> >  drivers/gpu/drm/i915/gt/intel_engine.h        |  2 -
> >  drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 23 +-----
> >  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  2 +
> >  drivers/gpu/drm/i915/gt/intel_engine_types.h  | 13 +--
> >  drivers/gpu/drm/i915/gt/intel_lrc.c           | 62 ++------------
> >  drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 44 +---------
> >  drivers/gpu/drm/i915/gt/mock_engine.c         | 11 +--
> >  drivers/gpu/drm/i915/i915_active.c            | 80 ++++++++++++++++++-
> >  drivers/gpu/drm/i915/i915_active.h            |  5 ++
> >  drivers/gpu/drm/i915/i915_active_types.h      |  3 +
> >  drivers/gpu/drm/i915/i915_gem.c               |  4 -
> >  drivers/gpu/drm/i915/i915_request.c           | 15 ----
> >  .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 -
> >  19 files changed, 214 insertions(+), 185 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index c86ca9f21532..6200060aef05 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -692,17 +692,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> >       return 0;
> >  }
> >  
> > -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
> > -{
> > -     struct intel_engine_cs *engine;
> > -     enum intel_engine_id id;
> > -
> > -     lockdep_assert_held(&dev_priv->drm.struct_mutex);
> > -
> > -     for_each_engine(engine, dev_priv, id)
> > -             intel_engine_lost_context(engine);
> > -}
> > -
> >  void i915_gem_contexts_fini(struct drm_i915_private *i915)
> >  {
> >       lockdep_assert_held(&i915->drm.struct_mutex);
> > @@ -1203,10 +1192,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
> >       if (ret)
> >               goto out_add;
> >  
> > -     ret = gen8_emit_rpcs_config(rq, ce, sseu);
> > -     if (ret)
> > -             goto out_add;
> > -
> >       /*
> >        * Guarantee context image and the timeline remains pinned until the
> >        * modifying request is retired by setting the ce activity tracker.
> > @@ -1214,9 +1199,12 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
> >        * But we only need to take one pin on the account of it. Or in other
> >        * words transfer the pinned ce object to tracked active request.
> >        */
> > -     if (!i915_active_request_isset(&ce->active_tracker))
> > -             __intel_context_pin(ce);
> > -     __i915_active_request_set(&ce->active_tracker, rq);
> > +     GEM_BUG_ON(i915_active_is_idle(&ce->active));
> > +     ret = i915_active_ref(&ce->active, rq->fence.context, rq);
> 
> 
> Why the place to keep the context alive is this function?

This is a special case where we have one context (the kernel context)
writing into the context state object of another. To keep the target
context state pinned, we mark the entire context as active.
 
> In other words, if the sseu state is not changed, we bail out early
> and don't setup the tracker and thus fail in promise for keeping it alive.

As we don't need to keep it alive for an access that never happened.

> > +     if (ret)
> > +             goto out_add;
> > +
> > +     ret = gen8_emit_rpcs_config(rq, ce, sseu);
> >  
> >  out_add:
> >       i915_request_add(rq);
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> > index 630392c77e48..9691dd062f72 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> > @@ -134,7 +134,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
> >  
> >  /* i915_gem_context.c */
> >  int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
> > -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
> >  void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
> >  
> >  int i915_gem_context_open(struct drm_i915_private *i915,
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > index f40f13c0b8b7..59b6d45b1936 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> > @@ -10,6 +10,22 @@
> >  #include "i915_drv.h"
> >  #include "i915_globals.h"
> >  
> > +static void call_idle_barriers(struct intel_engine_cs *engine)
> > +{
> > +     struct llist_node *node, *next;
> > +
> > +     llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
> > +             struct i915_active_request *active =
> > +                     container_of((struct list_head *)node,
> > +                                  typeof(*active), link);
> > +
> > +             INIT_LIST_HEAD(&active->link);
> > +             RCU_INIT_POINTER(active->request, NULL);
> > +
> > +             active->retire(active, NULL);
> > +     }
> > +}
> > +
> >  static void i915_gem_park(struct drm_i915_private *i915)
> >  {
> >       struct intel_engine_cs *engine;
> > @@ -17,8 +33,10 @@ static void i915_gem_park(struct drm_i915_private *i915)
> >  
> >       lockdep_assert_held(&i915->drm.struct_mutex);
> >  
> > -     for_each_engine(engine, i915, id)
> > +     for_each_engine(engine, i915, id) {
> > +             call_idle_barriers(engine); /* cleanup after wedging */
> >               i915_gem_batch_pool_fini(&engine->batch_pool);
> > +     }
> >  
> >       i915_timelines_park(i915);
> >       i915_vma_parked(i915);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> > index c78ec0b58e77..c10eb4904264 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> > @@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce)
> >  
> >               i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
> >  
> > -             intel_context_get(ce);
> >               smp_mb__before_atomic(); /* flush pin before it is visible */
> >       }
> >  
> > @@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce)
> >               ce->ops->unpin(ce);
> >  
> >               i915_gem_context_put(ce->gem_context);
> > -             intel_context_put(ce);
> > +             intel_context_inactive(ce);
> >       }
> >  
> >       mutex_unlock(&ce->pin_mutex);
> >       intel_context_put(ce);
> >  }
> >  
> > -static void intel_context_retire(struct i915_active_request *active,
> > -                              struct i915_request *rq)
> > +static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
> >  {
> > -     struct intel_context *ce =
> > -             container_of(active, typeof(*ce), active_tracker);
> > +     int err;
> >  
> > -     intel_context_unpin(ce);
> > +     err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
> > +     if (err)
> > +             return err;
> > +
> > +     /*
> > +      * And mark it as a globally pinned object to let the shrinker know
> > +      * it cannot reclaim the object until we release it.
> > +      */
> > +     vma->obj->pin_global++;
> > +     vma->obj->mm.dirty = true;
> > +
> > +     return 0;
> > +}
> > +
> > +static void __context_unpin_state(struct i915_vma *vma)
> > +{
> > +     vma->obj->pin_global--;
> > +     __i915_vma_unpin(vma);
> > +}
> > +
> > +static void intel_context_retire(struct i915_active *active)
> > +{
> > +     struct intel_context *ce = container_of(active, typeof(*ce), active);
> > +
> > +     if (ce->state)
> > +             __context_unpin_state(ce->state);
> > +
> > +     intel_context_put(ce);
> >  }
> >  
> >  void
> > @@ -125,8 +149,46 @@ intel_context_init(struct intel_context *ce,
> >  
> >       mutex_init(&ce->pin_mutex);
> >  
> > -     i915_active_request_init(&ce->active_tracker,
> > -                              NULL, intel_context_retire);
> > +     i915_active_init(ctx->i915, &ce->active, intel_context_retire);
> > +}
> > +
> > +int intel_context_active(struct intel_context *ce, unsigned long flags)
> 
> 
> I can digest this but was missing the verb in this and thought
> intel_context_activate|deactivate.

You will never make me write activ8! Other than inserting mark or make,
I don't have a better idea and have grown quite used to over the last
several months. I think the intent here is reasonably clear, this is to
operate on the ce->active.

Maybe, intel_context_active_acquire() and intel_context_active_release()?

> > +{
> > +     int err;
> > +
> > +     if (!i915_active_acquire(&ce->active))
> > +             return 0;
> > +
> > +     intel_context_get(ce);
> > +
> > +     if (!ce->state)
> > +             return 0;
> > +
> > +     err = __context_pin_state(ce->state, flags);
> > +     if (err) {
> > +             i915_active_cancel(&ce->active);
> > +             intel_context_put(ce);
> > +             return err;
> > +     }
> > +
> > +     /* Preallocate tracking nodes */
> > +     if (!i915_gem_context_is_kernel(ce->gem_context)) {
> > +             err = i915_active_acquire_preallocate_barrier(&ce->active,
> > +                                                           ce->engine);
> > +             if (err) {
> > +                     i915_active_release(&ce->active);
> > +                     return err;
> > +             }
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +void intel_context_inactive(struct intel_context *ce)
> > +{
> > +     /* Nodes preallocated in intel_context_active() */
> > +     i915_active_acquire_barrier(&ce->active);
> > +     i915_active_release(&ce->active);
> >  }
> >  
> >  static void i915_global_context_shrink(void)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> > index 6d5453ba2c1e..4de4ba2df7d4 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> > @@ -102,6 +102,9 @@ static inline void intel_context_exit(struct intel_context *ce)
> >               ce->ops->exit(ce);
> >  }
> >  
> > +int intel_context_active(struct intel_context *ce, unsigned long flags);
> > +void intel_context_inactive(struct intel_context *ce);
> > +
> >  static inline struct intel_context *intel_context_get(struct intel_context *ce)
> >  {
> >       kref_get(&ce->ref);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > index 825fcf0ac9c4..e95be4be9612 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > @@ -56,10 +56,10 @@ struct intel_context {
> >       intel_engine_mask_t saturated; /* submitting semaphores too late? */
> >  
> >       /**
> > -      * active_tracker: Active tracker for the external rq activity
> > -      * on this intel_context object.
> > +      * active: Active tracker for the rq activity (inc. external) on this
> > +      * intel_context object.
> >        */
> > -     struct i915_active_request active_tracker;
> > +     struct i915_active active;
> >  
> >       const struct intel_context_ops *ops;
> >  
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> > index 201bbd2a4faf..b9fd88f21609 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> > @@ -466,8 +466,6 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
> >  bool intel_engine_is_idle(struct intel_engine_cs *engine);
> >  bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
> >  
> > -void intel_engine_lost_context(struct intel_engine_cs *engine);
> > -
> >  void intel_engines_reset_default_submission(struct drm_i915_private *i915);
> >  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
> >  
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index c0d986db5a75..5a08036ae774 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -611,6 +611,8 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
> >  {
> >       int err;
> >  
> > +     init_llist_head(&engine->barrier_tasks);
> > +
> >       err = init_status_page(engine);
> >       if (err)
> >               return err;
> > @@ -870,6 +872,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
> >       if (engine->preempt_context)
> >               intel_context_unpin(engine->preempt_context);
> >       intel_context_unpin(engine->kernel_context);
> > +     GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
> >  
> >       i915_timeline_fini(&engine->timeline);
> >  
> > @@ -1201,26 +1204,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
> >               engine->set_default_submission(engine);
> >  }
> >  
> > -/**
> > - * intel_engine_lost_context: called when the GPU is reset into unknown state
> > - * @engine: the engine
> > - *
> > - * We have either reset the GPU or otherwise about to lose state tracking of
> > - * the current GPU logical state (e.g. suspend). On next use, it is therefore
> > - * imperative that we make no presumptions about the current state and load
> > - * from scratch.
> > - */
> > -void intel_engine_lost_context(struct intel_engine_cs *engine)
> > -{
> > -     struct intel_context *ce;
> > -
> > -     lockdep_assert_held(&engine->i915->drm.struct_mutex);
> > -
> > -     ce = fetch_and_zero(&engine->last_retired_context);
> > -     if (ce)
> > -             intel_context_unpin(ce);
> > -}
> > -
> >  bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
> >  {
> >       switch (INTEL_GEN(engine->i915)) {
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > index ccf034764741..3c448a061abd 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > @@ -88,6 +88,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
> >  
> >       /* Check again on the next retirement. */
> >       engine->wakeref_serial = engine->serial + 1;
> > +
> > +     i915_request_add_barriers(rq);
> >       __i915_request_commit(rq);
> >  
> >       return false;
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index 01223864237a..33a31aa2d2ae 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -11,6 +11,7 @@
> >  #include <linux/irq_work.h>
> >  #include <linux/kref.h>
> >  #include <linux/list.h>
> > +#include <linux/llist.h>
> >  #include <linux/types.h>
> >  
> >  #include "i915_gem.h"
> > @@ -288,6 +289,7 @@ struct intel_engine_cs {
> >       struct intel_ring *buffer;
> >  
> >       struct i915_timeline timeline;
> > +     struct llist_head barrier_tasks;
> >  
> >       struct intel_context *kernel_context; /* pinned */
> >       struct intel_context *preempt_context; /* pinned; optional */
> > @@ -435,17 +437,6 @@ struct intel_engine_cs {
> >  
> >       struct intel_engine_execlists execlists;
> >  
> > -     /* Contexts are pinned whilst they are active on the GPU. The last
> > -      * context executed remains active whilst the GPU is idle - the
> > -      * switch away and write to the context object only occurs on the
> > -      * next execution.  Contexts are only unpinned on retirement of the
> > -      * following request ensuring that we can always write to the object
> > -      * on the context switch even after idling. Across suspend, we switch
> > -      * to the kernel context and trash it as the save may not happen
> > -      * before the hardware is powered down.
> > -      */
> > -     struct intel_context *last_retired_context;
> > -
> >       /* status_notifier: list of callbacks for context-switch changes */
> >       struct atomic_notifier_head context_status_notifier;
> >  
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index b8f5592da18f..05524489615c 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -1422,60 +1422,11 @@ static void execlists_context_destroy(struct kref *kref)
> >       intel_context_free(ce);
> >  }
> >  
> > -static int __context_pin(struct i915_vma *vma)
> > -{
> > -     unsigned int flags;
> > -     int err;
> > -
> > -     flags = PIN_GLOBAL | PIN_HIGH;
> > -     flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> > -
> > -     err = i915_vma_pin(vma, 0, 0, flags);
> > -     if (err)
> > -             return err;
> > -
> > -     vma->obj->pin_global++;
> > -     vma->obj->mm.dirty = true;
> > -
> > -     return 0;
> > -}
> > -
> > -static void __context_unpin(struct i915_vma *vma)
> > -{
> > -     vma->obj->pin_global--;
> > -     __i915_vma_unpin(vma);
> > -}
> > -
> >  static void execlists_context_unpin(struct intel_context *ce)
> >  {
> > -     struct intel_engine_cs *engine;
> > -
> > -     /*
> > -      * The tasklet may still be using a pointer to our state, via an
> > -      * old request. However, since we know we only unpin the context
> > -      * on retirement of the following request, we know that the last
> > -      * request referencing us will have had a completion CS interrupt.
> > -      * If we see that it is still active, it means that the tasklet hasn't
> > -      * had the chance to run yet; let it run before we teardown the
> > -      * reference it may use.
> > -      */
> > -     engine = READ_ONCE(ce->inflight);
> > -     if (unlikely(engine)) {
> > -             unsigned long flags;
> > -
> > -             spin_lock_irqsave(&engine->timeline.lock, flags);
> > -             process_csb(engine);
> > -             spin_unlock_irqrestore(&engine->timeline.lock, flags);
> > -
> > -             GEM_BUG_ON(READ_ONCE(ce->inflight));
> > -     }
> > -
> >       i915_gem_context_unpin_hw_id(ce->gem_context);
> > -
> > -     intel_ring_unpin(ce->ring);
> > -
> >       i915_gem_object_unpin_map(ce->state->obj);
> > -     __context_unpin(ce->state);
> > +     intel_ring_unpin(ce->ring);
> >  }
> >  
> >  static void
> > @@ -1512,7 +1463,10 @@ __execlists_context_pin(struct intel_context *ce,
> >               goto err;
> >       GEM_BUG_ON(!ce->state);
> >  
> > -     ret = __context_pin(ce->state);
> > +     ret = intel_context_active(ce,
> > +                                engine->i915->ggtt.pin_bias |
> > +                                PIN_OFFSET_BIAS |
> > +                                PIN_HIGH);
> >       if (ret)
> >               goto err;
> >  
> > @@ -1521,7 +1475,7 @@ __execlists_context_pin(struct intel_context *ce,
> >                                       I915_MAP_OVERRIDE);
> >       if (IS_ERR(vaddr)) {
> >               ret = PTR_ERR(vaddr);
> > -             goto unpin_vma;
> > +             goto unpin_active;
> >       }
> >  
> >       ret = intel_ring_pin(ce->ring);
> > @@ -1542,8 +1496,8 @@ __execlists_context_pin(struct intel_context *ce,
> >       intel_ring_unpin(ce->ring);
> >  unpin_map:
> >       i915_gem_object_unpin_map(ce->state->obj);
> > -unpin_vma:
> > -     __context_unpin(ce->state);
> > +unpin_active:
> > +     intel_context_inactive(ce);
> >  err:
> >       return ret;
> >  }
> > diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> > index c834d016c965..7ab28b6f62a1 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> > @@ -1349,45 +1349,9 @@ static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
> >               gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
> >  }
> >  
> > -static int __context_pin(struct intel_context *ce)
> > -{
> > -     struct i915_vma *vma;
> > -     int err;
> > -
> > -     vma = ce->state;
> > -     if (!vma)
> > -             return 0;
> > -
> > -     err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> > -     if (err)
> > -             return err;
> > -
> > -     /*
> > -      * And mark is as a globally pinned object to let the shrinker know
> > -      * it cannot reclaim the object until we release it.
> > -      */
> > -     vma->obj->pin_global++;
> > -     vma->obj->mm.dirty = true;
> > -
> > -     return 0;
> > -}
> > -
> > -static void __context_unpin(struct intel_context *ce)
> > -{
> > -     struct i915_vma *vma;
> > -
> > -     vma = ce->state;
> > -     if (!vma)
> > -             return;
> > -
> > -     vma->obj->pin_global--;
> > -     i915_vma_unpin(vma);
> > -}
> > -
> >  static void ring_context_unpin(struct intel_context *ce)
> >  {
> >       __context_unpin_ppgtt(ce->gem_context);
> > -     __context_unpin(ce);
> >  }
> >  
> >  static struct i915_vma *
> > @@ -1477,18 +1441,18 @@ static int ring_context_pin(struct intel_context *ce)
> >               ce->state = vma;
> >       }
> >  
> > -     err = __context_pin(ce);
> > +     err = intel_context_active(ce, PIN_HIGH);
> >       if (err)
> >               return err;
> >  
> >       err = __context_pin_ppgtt(ce->gem_context);
> >       if (err)
> > -             goto err_unpin;
> > +             goto err_active;
> >  
> >       return 0;
> >  
> > -err_unpin:
> > -     __context_unpin(ce);
> > +err_active:
> > +     intel_context_inactive(ce);
> >       return err;
> >  }
> >  
> > diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> > index 6d7562769eb2..b7675ef18523 100644
> > --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> > +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> > @@ -146,12 +146,18 @@ static void mock_context_destroy(struct kref *ref)
> >  
> >  static int mock_context_pin(struct intel_context *ce)
> >  {
> > +     int ret;
> > +
> >       if (!ce->ring) {
> >               ce->ring = mock_ring(ce->engine);
> >               if (!ce->ring)
> >                       return -ENOMEM;
> >       }
> >  
> > +     ret = intel_context_active(ce, PIN_HIGH);
> > +     if (ret)
> > +             return ret;
> > +
> >       mock_timeline_pin(ce->ring->timeline);
> >       return 0;
> >  }
> > @@ -328,14 +334,9 @@ void mock_engine_free(struct intel_engine_cs *engine)
> >  {
> >       struct mock_engine *mock =
> >               container_of(engine, typeof(*mock), base);
> > -     struct intel_context *ce;
> >  
> >       GEM_BUG_ON(timer_pending(&mock->hw_delay));
> >  
> > -     ce = fetch_and_zero(&engine->last_retired_context);
> > -     if (ce)
> > -             intel_context_unpin(ce);
> > -
> >       intel_context_unpin(engine->kernel_context);
> >  
> >       intel_engine_fini_breadcrumbs(engine);
> > diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> > index 863ae12707ba..100e40afc9d6 100644
> > --- a/drivers/gpu/drm/i915/i915_active.c
> > +++ b/drivers/gpu/drm/i915/i915_active.c
> > @@ -100,7 +100,7 @@ active_instance(struct i915_active *ref, u64 idx)
> >               parent = *p;
> >  
> >               node = rb_entry(parent, struct active_node, node);
> > -             if (node->timeline == idx)
> > +             if (node->timeline == idx && !IS_ERR(node->base.request))
> 
> Is this related change?

It once was (in the next chunk). I used to insert the freshly preallocated
node into the tree before it had a valid request. It appears that is no
longer the case and the ERR_PTR is kept safely in a list until ready.

> -Mika
> 
> >                       goto replace;
> >  
> >               if (node->timeline < idx)
> > @@ -157,6 +157,7 @@ void i915_active_init(struct drm_i915_private *i915,
> >       ref->retire = retire;
> >       ref->tree = RB_ROOT;
> >       i915_active_request_init(&ref->last, NULL, last_retire);
> > +     init_llist_head(&ref->barriers);
> >       ref->count = 0;
> >  }
> >  
> > @@ -263,6 +264,83 @@ void i915_active_fini(struct i915_active *ref)
> >  }
> >  #endif
> >  
> > +int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
> > +                                         struct intel_engine_cs *engine)
> > +{
> > +     struct drm_i915_private *i915 = engine->i915;
> > +     unsigned long tmp;
> > +     int err = 0;
> > +
> > +     GEM_BUG_ON(!engine->mask);
> > +     for_each_engine_masked(engine, i915, engine->mask, tmp) {
> > +             struct intel_context *kctx = engine->kernel_context;
> > +             struct active_node *node;
> > +
> > +             node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
> > +             if (unlikely(!node)) {
> > +                     err = -ENOMEM;
> > +                     break;
> > +             }
> > +
> > +             i915_active_request_init(&node->base,
> > +                                      (void *)engine, node_retire);
> > +             node->timeline = kctx->ring->timeline->fence_context;
> > +             node->ref = ref;
> > +             ref->count++;
> > +
> > +             llist_add((struct llist_node *)&node->base.link,
> > +                       &ref->barriers);
> > +     }
> > +
> > +     return err;
> > +}
> > +
> > +void i915_active_acquire_barrier(struct i915_active *ref)
> > +{
> > +     struct llist_node *pos, *next;
> > +
> > +     i915_active_acquire(ref);
> > +
> > +     llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
> > +             struct intel_engine_cs *engine;
> > +             struct active_node *node;
> > +             struct rb_node **p, *parent;
> > +
> > +             node = container_of((struct list_head *)pos,
> > +                                 typeof(*node), base.link);
> > +
> > +             engine = (void *)rcu_access_pointer(node->base.request);
> > +             RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
> > +
> > +             parent = NULL;
> > +             p = &ref->tree.rb_node;
> > +             while (*p) {
> > +                     parent = *p;
> > +                     if (rb_entry(parent,
> > +                                  struct active_node,
> > +                                  node)->timeline < node->timeline)
> > +                             p = &parent->rb_right;
> > +                     else
> > +                             p = &parent->rb_left;
> > +             }
> > +             rb_link_node(&node->node, parent, p);
> > +             rb_insert_color(&node->node, &ref->tree);
> > +
> > +             llist_add((struct llist_node *)&node->base.link,
> > +                       &engine->barrier_tasks);
> > +     }
> > +     i915_active_release(ref);
> > +}
> > +
> > +void i915_request_add_barriers(struct i915_request *rq)
> > +{
> > +     struct intel_engine_cs *engine = rq->engine;
> > +     struct llist_node *node, *next;
> > +
> > +     llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
> > +             list_add_tail((struct list_head *)node, &rq->active_list);
> > +}
> > +
> >  int i915_active_request_set(struct i915_active_request *active,
> >                           struct i915_request *rq)
> >  {
> > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> > index 7d758719ce39..d55d37673944 100644
> > --- a/drivers/gpu/drm/i915/i915_active.h
> > +++ b/drivers/gpu/drm/i915/i915_active.h
> > @@ -406,4 +406,9 @@ void i915_active_fini(struct i915_active *ref);
> >  static inline void i915_active_fini(struct i915_active *ref) { }
> >  #endif
> >  
> > +int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
> > +                                         struct intel_engine_cs *engine);
> > +void i915_active_acquire_barrier(struct i915_active *ref);
> > +void i915_request_add_barriers(struct i915_request *rq);
> > +
> >  #endif /* _I915_ACTIVE_H_ */
> > diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> > index b679253b53a5..c025991b9233 100644
> > --- a/drivers/gpu/drm/i915/i915_active_types.h
> > +++ b/drivers/gpu/drm/i915/i915_active_types.h
> > @@ -7,6 +7,7 @@
> >  #ifndef _I915_ACTIVE_TYPES_H_
> >  #define _I915_ACTIVE_TYPES_H_
> >  
> > +#include <linux/llist.h>
> >  #include <linux/rbtree.h>
> >  #include <linux/rcupdate.h>
> >  
> > @@ -31,6 +32,8 @@ struct i915_active {
> >       unsigned int count;
> >  
> >       void (*retire)(struct i915_active *ref);
> > +
> > +     struct llist_head barriers;
> >  };
> >  
> >  #endif /* _I915_ACTIVE_TYPES_H_ */
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index e980c1ee3dcf..0663f2df65d6 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1197,10 +1197,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
> >  
> >       intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
> >       intel_runtime_pm_put(i915, wakeref);
> > -
> > -     mutex_lock(&i915->drm.struct_mutex);
> > -     i915_gem_contexts_lost(i915);
> > -     mutex_unlock(&i915->drm.struct_mutex);
> >  }
> >  
> >  void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index e9b59eea4f10..9eff9de7fa10 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -213,18 +213,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
> >       spin_unlock(&rq->lock);
> >  
> >       local_irq_enable();
> > -
> > -     /*
> > -      * The backing object for the context is done after switching to the
> > -      * *next* context. Therefore we cannot retire the previous context until
> > -      * the next context has already started running. However, since we
> > -      * cannot take the required locks at i915_request_submit() we
> > -      * defer the unpinning of the active context to now, retirement of
> > -      * the subsequent request.
> > -      */
> > -     if (engine->last_retired_context)
> > -             intel_context_unpin(engine->last_retired_context);
> > -     engine->last_retired_context = rq->hw_context;
> >  }
> >  
> >  static void __retire_engine_upto(struct intel_engine_cs *engine,
> > @@ -759,9 +747,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
> >  
> >       rq->infix = rq->ring->emit; /* end of header; start of user payload */
> >  
> > -     /* Keep a second pin for the dual retirement along engine and ring */
> > -     __intel_context_pin(ce);
> > -
> >       intel_context_mark_active(ce);
> >       return rq;
> >  
> > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> > index b7f3fbb4ae89..a96d0c012d46 100644
> > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> > @@ -56,7 +56,6 @@ static void mock_device_release(struct drm_device *dev)
> >  
> >       mutex_lock(&i915->drm.struct_mutex);
> >       mock_device_flush(i915);
> > -     i915_gem_contexts_lost(i915);
> >       mutex_unlock(&i915->drm.struct_mutex);
> >  
> >       flush_work(&i915->gem.idle_work);
> > -- 
> > 2.20.1
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12 13:42     ` Chris Wilson
@ 2019-06-12 14:09       ` Mika Kuoppala
  2019-06-12 14:17         ` Chris Wilson
  0 siblings, 1 reply; 31+ messages in thread
From: Mika Kuoppala @ 2019-06-12 14:09 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-06-12 14:29:48)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > We need to keep the context image pinned in memory until after the GPU
>> > has finished writing into it. Since it continues to write as we signal
>> > the final breadcrumb, we need to keep it pinned until the request after
>> > it is complete. Currently we know the order in which requests execute on
>> > each engine, and so to remove that presumption we need to identify a
>> > request/context-switch we know must occur after our completion. Any
>> > request queued after the signal must imply a context switch, for
>> > simplicity we use a fresh request from the kernel context.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> >  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 24 ++----
>> >  drivers/gpu/drm/i915/gem/i915_gem_context.h   |  1 -
>> >  drivers/gpu/drm/i915/gem/i915_gem_pm.c        | 20 ++++-
>> >  drivers/gpu/drm/i915/gt/intel_context.c       | 80 ++++++++++++++++---
>> >  drivers/gpu/drm/i915/gt/intel_context.h       |  3 +
>> >  drivers/gpu/drm/i915/gt/intel_context_types.h |  6 +-
>> >  drivers/gpu/drm/i915/gt/intel_engine.h        |  2 -
>> >  drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 23 +-----
>> >  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  2 +
>> >  drivers/gpu/drm/i915/gt/intel_engine_types.h  | 13 +--
>> >  drivers/gpu/drm/i915/gt/intel_lrc.c           | 62 ++------------
>> >  drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 44 +---------
>> >  drivers/gpu/drm/i915/gt/mock_engine.c         | 11 +--
>> >  drivers/gpu/drm/i915/i915_active.c            | 80 ++++++++++++++++++-
>> >  drivers/gpu/drm/i915/i915_active.h            |  5 ++
>> >  drivers/gpu/drm/i915/i915_active_types.h      |  3 +
>> >  drivers/gpu/drm/i915/i915_gem.c               |  4 -
>> >  drivers/gpu/drm/i915/i915_request.c           | 15 ----
>> >  .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 -
>> >  19 files changed, 214 insertions(+), 185 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> > index c86ca9f21532..6200060aef05 100644
>> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> > @@ -692,17 +692,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>> >       return 0;
>> >  }
>> >  
>> > -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
>> > -{
>> > -     struct intel_engine_cs *engine;
>> > -     enum intel_engine_id id;
>> > -
>> > -     lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> > -
>> > -     for_each_engine(engine, dev_priv, id)
>> > -             intel_engine_lost_context(engine);
>> > -}
>> > -
>> >  void i915_gem_contexts_fini(struct drm_i915_private *i915)
>> >  {
>> >       lockdep_assert_held(&i915->drm.struct_mutex);
>> > @@ -1203,10 +1192,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>> >       if (ret)
>> >               goto out_add;
>> >  
>> > -     ret = gen8_emit_rpcs_config(rq, ce, sseu);
>> > -     if (ret)
>> > -             goto out_add;
>> > -
>> >       /*
>> >        * Guarantee context image and the timeline remains pinned until the
>> >        * modifying request is retired by setting the ce activity tracker.
>> > @@ -1214,9 +1199,12 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>> >        * But we only need to take one pin on the account of it. Or in other
>> >        * words transfer the pinned ce object to tracked active request.
>> >        */
>> > -     if (!i915_active_request_isset(&ce->active_tracker))
>> > -             __intel_context_pin(ce);
>> > -     __i915_active_request_set(&ce->active_tracker, rq);
>> > +     GEM_BUG_ON(i915_active_is_idle(&ce->active));
>> > +     ret = i915_active_ref(&ce->active, rq->fence.context, rq);
>> 
>> 
>> Why the place to keep the context alive is this function?
>
> This is a special case where we have one context (the kernel context)
> writing into the context state object of another. To keep the target
> context state pinned, we mark the entire context as active.
>  
>> In other words, if the sseu state is not changed, we bail out early
>> and don't setup the tracker and thus fail in promise for keeping it alive.
>
> As we don't need to keep it alive for an access that never happened.
>
>> > +     if (ret)
>> > +             goto out_add;
>> > +
>> > +     ret = gen8_emit_rpcs_config(rq, ce, sseu);
>> >  
>> >  out_add:
>> >       i915_request_add(rq);
>> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
>> > index 630392c77e48..9691dd062f72 100644
>> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
>> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
>> > @@ -134,7 +134,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
>> >  
>> >  /* i915_gem_context.c */
>> >  int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
>> > -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
>> >  void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
>> >  
>> >  int i915_gem_context_open(struct drm_i915_private *i915,
>> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>> > index f40f13c0b8b7..59b6d45b1936 100644
>> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>> > @@ -10,6 +10,22 @@
>> >  #include "i915_drv.h"
>> >  #include "i915_globals.h"
>> >  
>> > +static void call_idle_barriers(struct intel_engine_cs *engine)
>> > +{
>> > +     struct llist_node *node, *next;
>> > +
>> > +     llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
>> > +             struct i915_active_request *active =
>> > +                     container_of((struct list_head *)node,
>> > +                                  typeof(*active), link);
>> > +
>> > +             INIT_LIST_HEAD(&active->link);
>> > +             RCU_INIT_POINTER(active->request, NULL);
>> > +
>> > +             active->retire(active, NULL);
>> > +     }
>> > +}
>> > +
>> >  static void i915_gem_park(struct drm_i915_private *i915)
>> >  {
>> >       struct intel_engine_cs *engine;
>> > @@ -17,8 +33,10 @@ static void i915_gem_park(struct drm_i915_private *i915)
>> >  
>> >       lockdep_assert_held(&i915->drm.struct_mutex);
>> >  
>> > -     for_each_engine(engine, i915, id)
>> > +     for_each_engine(engine, i915, id) {
>> > +             call_idle_barriers(engine); /* cleanup after wedging */
>> >               i915_gem_batch_pool_fini(&engine->batch_pool);
>> > +     }
>> >  
>> >       i915_timelines_park(i915);
>> >       i915_vma_parked(i915);
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
>> > index c78ec0b58e77..c10eb4904264 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
>> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
>> > @@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce)
>> >  
>> >               i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
>> >  
>> > -             intel_context_get(ce);
>> >               smp_mb__before_atomic(); /* flush pin before it is visible */
>> >       }
>> >  
>> > @@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce)
>> >               ce->ops->unpin(ce);
>> >  
>> >               i915_gem_context_put(ce->gem_context);
>> > -             intel_context_put(ce);
>> > +             intel_context_inactive(ce);
>> >       }
>> >  
>> >       mutex_unlock(&ce->pin_mutex);
>> >       intel_context_put(ce);
>> >  }
>> >  
>> > -static void intel_context_retire(struct i915_active_request *active,
>> > -                              struct i915_request *rq)
>> > +static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
>> >  {
>> > -     struct intel_context *ce =
>> > -             container_of(active, typeof(*ce), active_tracker);
>> > +     int err;
>> >  
>> > -     intel_context_unpin(ce);
>> > +     err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
>> > +     if (err)
>> > +             return err;
>> > +
>> > +     /*
>> > +      * And mark it as a globally pinned object to let the shrinker know
>> > +      * it cannot reclaim the object until we release it.
>> > +      */
>> > +     vma->obj->pin_global++;
>> > +     vma->obj->mm.dirty = true;
>> > +
>> > +     return 0;
>> > +}
>> > +
>> > +static void __context_unpin_state(struct i915_vma *vma)
>> > +{
>> > +     vma->obj->pin_global--;
>> > +     __i915_vma_unpin(vma);
>> > +}
>> > +
>> > +static void intel_context_retire(struct i915_active *active)
>> > +{
>> > +     struct intel_context *ce = container_of(active, typeof(*ce), active);
>> > +
>> > +     if (ce->state)
>> > +             __context_unpin_state(ce->state);
>> > +
>> > +     intel_context_put(ce);
>> >  }
>> >  
>> >  void
>> > @@ -125,8 +149,46 @@ intel_context_init(struct intel_context *ce,
>> >  
>> >       mutex_init(&ce->pin_mutex);
>> >  
>> > -     i915_active_request_init(&ce->active_tracker,
>> > -                              NULL, intel_context_retire);
>> > +     i915_active_init(ctx->i915, &ce->active, intel_context_retire);
>> > +}
>> > +
>> > +int intel_context_active(struct intel_context *ce, unsigned long flags)
>> 
>> 
>> I can digest this but was missing the verb in this and thought
>> intel_context_activate|deactivate.
>
> You will never make me write activ8! Other than inserting mark or make,
> I don't have a better idea and have grown quite used to over the last
> several months. I think the intent here is reasonably clear, this is to
> operate on the ce->active.
>

mark_active was also in my mind.

> Maybe, intel_context_active_acquire() and intel_context_active_release()?

On your note that it is the ce->active we operate on, not the context,
the current naming starts to fit. 

>
>> > +{
>> > +     int err;
>> > +
>> > +     if (!i915_active_acquire(&ce->active))
>> > +             return 0;
>> > +
>> > +     intel_context_get(ce);
>> > +
>> > +     if (!ce->state)
>> > +             return 0;
>> > +
>> > +     err = __context_pin_state(ce->state, flags);
>> > +     if (err) {
>> > +             i915_active_cancel(&ce->active);
>> > +             intel_context_put(ce);
>> > +             return err;
>> > +     }
>> > +
>> > +     /* Preallocate tracking nodes */
>> > +     if (!i915_gem_context_is_kernel(ce->gem_context)) {
>> > +             err = i915_active_acquire_preallocate_barrier(&ce->active,
>> > +                                                           ce->engine);
>> > +             if (err) {
>> > +                     i915_active_release(&ce->active);
>> > +                     return err;
>> > +             }
>> > +     }
>> > +
>> > +     return 0;
>> > +}
>> > +
>> > +void intel_context_inactive(struct intel_context *ce)
>> > +{
>> > +     /* Nodes preallocated in intel_context_active() */
>> > +     i915_active_acquire_barrier(&ce->active);
>> > +     i915_active_release(&ce->active);
>> >  }
>> >  
>> >  static void i915_global_context_shrink(void)
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
>> > index 6d5453ba2c1e..4de4ba2df7d4 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_context.h
>> > +++ b/drivers/gpu/drm/i915/gt/intel_context.h
>> > @@ -102,6 +102,9 @@ static inline void intel_context_exit(struct intel_context *ce)
>> >               ce->ops->exit(ce);
>> >  }
>> >  
>> > +int intel_context_active(struct intel_context *ce, unsigned long flags);
>> > +void intel_context_inactive(struct intel_context *ce);
>> > +
>> >  static inline struct intel_context *intel_context_get(struct intel_context *ce)
>> >  {
>> >       kref_get(&ce->ref);
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> > index 825fcf0ac9c4..e95be4be9612 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
>> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> > @@ -56,10 +56,10 @@ struct intel_context {
>> >       intel_engine_mask_t saturated; /* submitting semaphores too late? */
>> >  
>> >       /**
>> > -      * active_tracker: Active tracker for the external rq activity
>> > -      * on this intel_context object.
>> > +      * active: Active tracker for the rq activity (inc. external) on this
>> > +      * intel_context object.
>> >        */
>> > -     struct i915_active_request active_tracker;
>> > +     struct i915_active active;
>> >  
>> >       const struct intel_context_ops *ops;
>> >  
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
>> > index 201bbd2a4faf..b9fd88f21609 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_engine.h
>> > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
>> > @@ -466,8 +466,6 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
>> >  bool intel_engine_is_idle(struct intel_engine_cs *engine);
>> >  bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
>> >  
>> > -void intel_engine_lost_context(struct intel_engine_cs *engine);
>> > -
>> >  void intel_engines_reset_default_submission(struct drm_i915_private *i915);
>> >  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
>> >  
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> > index c0d986db5a75..5a08036ae774 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> > @@ -611,6 +611,8 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
>> >  {
>> >       int err;
>> >  
>> > +     init_llist_head(&engine->barrier_tasks);
>> > +
>> >       err = init_status_page(engine);
>> >       if (err)
>> >               return err;
>> > @@ -870,6 +872,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>> >       if (engine->preempt_context)
>> >               intel_context_unpin(engine->preempt_context);
>> >       intel_context_unpin(engine->kernel_context);
>> > +     GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
>> >  
>> >       i915_timeline_fini(&engine->timeline);
>> >  
>> > @@ -1201,26 +1204,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
>> >               engine->set_default_submission(engine);
>> >  }
>> >  
>> > -/**
>> > - * intel_engine_lost_context: called when the GPU is reset into unknown state
>> > - * @engine: the engine
>> > - *
>> > - * We have either reset the GPU or otherwise about to lose state tracking of
>> > - * the current GPU logical state (e.g. suspend). On next use, it is therefore
>> > - * imperative that we make no presumptions about the current state and load
>> > - * from scratch.
>> > - */
>> > -void intel_engine_lost_context(struct intel_engine_cs *engine)
>> > -{
>> > -     struct intel_context *ce;
>> > -
>> > -     lockdep_assert_held(&engine->i915->drm.struct_mutex);
>> > -
>> > -     ce = fetch_and_zero(&engine->last_retired_context);
>> > -     if (ce)
>> > -             intel_context_unpin(ce);
>> > -}
>> > -
>> >  bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
>> >  {
>> >       switch (INTEL_GEN(engine->i915)) {
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>> > index ccf034764741..3c448a061abd 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>> > @@ -88,6 +88,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>> >  
>> >       /* Check again on the next retirement. */
>> >       engine->wakeref_serial = engine->serial + 1;
>> > +
>> > +     i915_request_add_barriers(rq);
>> >       __i915_request_commit(rq);
>> >  
>> >       return false;
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> > index 01223864237a..33a31aa2d2ae 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> > @@ -11,6 +11,7 @@
>> >  #include <linux/irq_work.h>
>> >  #include <linux/kref.h>
>> >  #include <linux/list.h>
>> > +#include <linux/llist.h>
>> >  #include <linux/types.h>
>> >  
>> >  #include "i915_gem.h"
>> > @@ -288,6 +289,7 @@ struct intel_engine_cs {
>> >       struct intel_ring *buffer;
>> >  
>> >       struct i915_timeline timeline;
>> > +     struct llist_head barrier_tasks;
>> >  
>> >       struct intel_context *kernel_context; /* pinned */
>> >       struct intel_context *preempt_context; /* pinned; optional */
>> > @@ -435,17 +437,6 @@ struct intel_engine_cs {
>> >  
>> >       struct intel_engine_execlists execlists;
>> >  
>> > -     /* Contexts are pinned whilst they are active on the GPU. The last
>> > -      * context executed remains active whilst the GPU is idle - the
>> > -      * switch away and write to the context object only occurs on the
>> > -      * next execution.  Contexts are only unpinned on retirement of the
>> > -      * following request ensuring that we can always write to the object
>> > -      * on the context switch even after idling. Across suspend, we switch
>> > -      * to the kernel context and trash it as the save may not happen
>> > -      * before the hardware is powered down.
>> > -      */
>> > -     struct intel_context *last_retired_context;
>> > -
>> >       /* status_notifier: list of callbacks for context-switch changes */
>> >       struct atomic_notifier_head context_status_notifier;
>> >  
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> > index b8f5592da18f..05524489615c 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> > @@ -1422,60 +1422,11 @@ static void execlists_context_destroy(struct kref *kref)
>> >       intel_context_free(ce);
>> >  }
>> >  
>> > -static int __context_pin(struct i915_vma *vma)
>> > -{
>> > -     unsigned int flags;
>> > -     int err;
>> > -
>> > -     flags = PIN_GLOBAL | PIN_HIGH;
>> > -     flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
>> > -
>> > -     err = i915_vma_pin(vma, 0, 0, flags);
>> > -     if (err)
>> > -             return err;
>> > -
>> > -     vma->obj->pin_global++;
>> > -     vma->obj->mm.dirty = true;
>> > -
>> > -     return 0;
>> > -}
>> > -
>> > -static void __context_unpin(struct i915_vma *vma)
>> > -{
>> > -     vma->obj->pin_global--;
>> > -     __i915_vma_unpin(vma);
>> > -}
>> > -
>> >  static void execlists_context_unpin(struct intel_context *ce)
>> >  {
>> > -     struct intel_engine_cs *engine;
>> > -
>> > -     /*
>> > -      * The tasklet may still be using a pointer to our state, via an
>> > -      * old request. However, since we know we only unpin the context
>> > -      * on retirement of the following request, we know that the last
>> > -      * request referencing us will have had a completion CS interrupt.
>> > -      * If we see that it is still active, it means that the tasklet hasn't
>> > -      * had the chance to run yet; let it run before we teardown the
>> > -      * reference it may use.
>> > -      */
>> > -     engine = READ_ONCE(ce->inflight);
>> > -     if (unlikely(engine)) {
>> > -             unsigned long flags;
>> > -
>> > -             spin_lock_irqsave(&engine->timeline.lock, flags);
>> > -             process_csb(engine);
>> > -             spin_unlock_irqrestore(&engine->timeline.lock, flags);
>> > -
>> > -             GEM_BUG_ON(READ_ONCE(ce->inflight));
>> > -     }
>> > -
>> >       i915_gem_context_unpin_hw_id(ce->gem_context);
>> > -
>> > -     intel_ring_unpin(ce->ring);
>> > -
>> >       i915_gem_object_unpin_map(ce->state->obj);
>> > -     __context_unpin(ce->state);
>> > +     intel_ring_unpin(ce->ring);
>> >  }
>> >  
>> >  static void
>> > @@ -1512,7 +1463,10 @@ __execlists_context_pin(struct intel_context *ce,
>> >               goto err;
>> >       GEM_BUG_ON(!ce->state);
>> >  
>> > -     ret = __context_pin(ce->state);
>> > +     ret = intel_context_active(ce,
>> > +                                engine->i915->ggtt.pin_bias |
>> > +                                PIN_OFFSET_BIAS |
>> > +                                PIN_HIGH);
>> >       if (ret)
>> >               goto err;
>> >  
>> > @@ -1521,7 +1475,7 @@ __execlists_context_pin(struct intel_context *ce,
>> >                                       I915_MAP_OVERRIDE);
>> >       if (IS_ERR(vaddr)) {
>> >               ret = PTR_ERR(vaddr);
>> > -             goto unpin_vma;
>> > +             goto unpin_active;
>> >       }
>> >  
>> >       ret = intel_ring_pin(ce->ring);
>> > @@ -1542,8 +1496,8 @@ __execlists_context_pin(struct intel_context *ce,
>> >       intel_ring_unpin(ce->ring);
>> >  unpin_map:
>> >       i915_gem_object_unpin_map(ce->state->obj);
>> > -unpin_vma:
>> > -     __context_unpin(ce->state);
>> > +unpin_active:
>> > +     intel_context_inactive(ce);
>> >  err:
>> >       return ret;
>> >  }
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> > index c834d016c965..7ab28b6f62a1 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> > +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> > @@ -1349,45 +1349,9 @@ static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
>> >               gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
>> >  }
>> >  
>> > -static int __context_pin(struct intel_context *ce)
>> > -{
>> > -     struct i915_vma *vma;
>> > -     int err;
>> > -
>> > -     vma = ce->state;
>> > -     if (!vma)
>> > -             return 0;
>> > -
>> > -     err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
>> > -     if (err)
>> > -             return err;
>> > -
>> > -     /*
>> > -      * And mark is as a globally pinned object to let the shrinker know
>> > -      * it cannot reclaim the object until we release it.
>> > -      */
>> > -     vma->obj->pin_global++;
>> > -     vma->obj->mm.dirty = true;
>> > -
>> > -     return 0;
>> > -}
>> > -
>> > -static void __context_unpin(struct intel_context *ce)
>> > -{
>> > -     struct i915_vma *vma;
>> > -
>> > -     vma = ce->state;
>> > -     if (!vma)
>> > -             return;
>> > -
>> > -     vma->obj->pin_global--;
>> > -     i915_vma_unpin(vma);
>> > -}
>> > -
>> >  static void ring_context_unpin(struct intel_context *ce)
>> >  {
>> >       __context_unpin_ppgtt(ce->gem_context);
>> > -     __context_unpin(ce);
>> >  }
>> >  
>> >  static struct i915_vma *
>> > @@ -1477,18 +1441,18 @@ static int ring_context_pin(struct intel_context *ce)
>> >               ce->state = vma;
>> >       }
>> >  
>> > -     err = __context_pin(ce);
>> > +     err = intel_context_active(ce, PIN_HIGH);
>> >       if (err)
>> >               return err;
>> >  
>> >       err = __context_pin_ppgtt(ce->gem_context);
>> >       if (err)
>> > -             goto err_unpin;
>> > +             goto err_active;
>> >  
>> >       return 0;
>> >  
>> > -err_unpin:
>> > -     __context_unpin(ce);
>> > +err_active:
>> > +     intel_context_inactive(ce);
>> >       return err;
>> >  }
>> >  
>> > diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
>> > index 6d7562769eb2..b7675ef18523 100644
>> > --- a/drivers/gpu/drm/i915/gt/mock_engine.c
>> > +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
>> > @@ -146,12 +146,18 @@ static void mock_context_destroy(struct kref *ref)
>> >  
>> >  static int mock_context_pin(struct intel_context *ce)
>> >  {
>> > +     int ret;
>> > +
>> >       if (!ce->ring) {
>> >               ce->ring = mock_ring(ce->engine);
>> >               if (!ce->ring)
>> >                       return -ENOMEM;
>> >       }
>> >  
>> > +     ret = intel_context_active(ce, PIN_HIGH);
>> > +     if (ret)
>> > +             return ret;
>> > +
>> >       mock_timeline_pin(ce->ring->timeline);
>> >       return 0;
>> >  }
>> > @@ -328,14 +334,9 @@ void mock_engine_free(struct intel_engine_cs *engine)
>> >  {
>> >       struct mock_engine *mock =
>> >               container_of(engine, typeof(*mock), base);
>> > -     struct intel_context *ce;
>> >  
>> >       GEM_BUG_ON(timer_pending(&mock->hw_delay));
>> >  
>> > -     ce = fetch_and_zero(&engine->last_retired_context);
>> > -     if (ce)
>> > -             intel_context_unpin(ce);
>> > -
>> >       intel_context_unpin(engine->kernel_context);
>> >  
>> >       intel_engine_fini_breadcrumbs(engine);
>> > diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
>> > index 863ae12707ba..100e40afc9d6 100644
>> > --- a/drivers/gpu/drm/i915/i915_active.c
>> > +++ b/drivers/gpu/drm/i915/i915_active.c
>> > @@ -100,7 +100,7 @@ active_instance(struct i915_active *ref, u64 idx)
>> >               parent = *p;
>> >  
>> >               node = rb_entry(parent, struct active_node, node);
>> > -             if (node->timeline == idx)
>> > +             if (node->timeline == idx && !IS_ERR(node->base.request))
>> 
>> Is this related change?
>
> It once was (in the next chunk). I used to insert the freshly preallocated
> node into the tree before it had a valid request. It appears that is no
> longer the case and the ERR_PTR is kept safely in a list until ready.
>
>> -Mika
>> 
>> >                       goto replace;
>> >  
>> >               if (node->timeline < idx)
>> > @@ -157,6 +157,7 @@ void i915_active_init(struct drm_i915_private *i915,
>> >       ref->retire = retire;
>> >       ref->tree = RB_ROOT;
>> >       i915_active_request_init(&ref->last, NULL, last_retire);
>> > +     init_llist_head(&ref->barriers);
>> >       ref->count = 0;
>> >  }
>> >  
>> > @@ -263,6 +264,83 @@ void i915_active_fini(struct i915_active *ref)
>> >  }
>> >  #endif
>> >  
>> > +int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>> > +                                         struct intel_engine_cs *engine)
>> > +{
>> > +     struct drm_i915_private *i915 = engine->i915;
>> > +     unsigned long tmp;
>> > +     int err = 0;
>> > +
>> > +     GEM_BUG_ON(!engine->mask);
>> > +     for_each_engine_masked(engine, i915, engine->mask, tmp) {
>> > +             struct intel_context *kctx = engine->kernel_context;
>> > +             struct active_node *node;
>> > +
>> > +             node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
>> > +             if (unlikely(!node)) {
>> > +                     err = -ENOMEM;
>> > +                     break;
>> > +             }
>> > +
>> > +             i915_active_request_init(&node->base,
>> > +                                      (void *)engine, node_retire);

In commit you promise that you will queue a request for kernel context.
But in here, you seem to use (abuse!?) the active request to
make a shadow of a request and use it to call the idle barriers.

So either the commit message needs tweaking or I don't have
a slightest idea yet where we are flying in here :)

-Mika

>> > +             node->timeline = kctx->ring->timeline->fence_context;
>> > +             node->ref = ref;
>> > +             ref->count++;
>> > +
>> > +             llist_add((struct llist_node *)&node->base.link,
>> > +                       &ref->barriers);
>> > +     }
>> > +
>> > +     return err;
>> > +}
>> > +
>> > +void i915_active_acquire_barrier(struct i915_active *ref)
>> > +{
>> > +     struct llist_node *pos, *next;
>> > +
>> > +     i915_active_acquire(ref);
>> > +
>> > +     llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
>> > +             struct intel_engine_cs *engine;
>> > +             struct active_node *node;
>> > +             struct rb_node **p, *parent;
>> > +
>> > +             node = container_of((struct list_head *)pos,
>> > +                                 typeof(*node), base.link);
>> > +
>> > +             engine = (void *)rcu_access_pointer(node->base.request);
>> > +             RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
>> > +
>> > +             parent = NULL;
>> > +             p = &ref->tree.rb_node;
>> > +             while (*p) {
>> > +                     parent = *p;
>> > +                     if (rb_entry(parent,
>> > +                                  struct active_node,
>> > +                                  node)->timeline < node->timeline)
>> > +                             p = &parent->rb_right;
>> > +                     else
>> > +                             p = &parent->rb_left;
>> > +             }
>> > +             rb_link_node(&node->node, parent, p);
>> > +             rb_insert_color(&node->node, &ref->tree);
>> > +
>> > +             llist_add((struct llist_node *)&node->base.link,
>> > +                       &engine->barrier_tasks);
>> > +     }
>> > +     i915_active_release(ref);
>> > +}
>> > +
>> > +void i915_request_add_barriers(struct i915_request *rq)
>> > +{
>> > +     struct intel_engine_cs *engine = rq->engine;
>> > +     struct llist_node *node, *next;
>> > +
>> > +     llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
>> > +             list_add_tail((struct list_head *)node, &rq->active_list);
>> > +}
>> > +
>> >  int i915_active_request_set(struct i915_active_request *active,
>> >                           struct i915_request *rq)
>> >  {
>> > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
>> > index 7d758719ce39..d55d37673944 100644
>> > --- a/drivers/gpu/drm/i915/i915_active.h
>> > +++ b/drivers/gpu/drm/i915/i915_active.h
>> > @@ -406,4 +406,9 @@ void i915_active_fini(struct i915_active *ref);
>> >  static inline void i915_active_fini(struct i915_active *ref) { }
>> >  #endif
>> >  
>> > +int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>> > +                                         struct intel_engine_cs *engine);
>> > +void i915_active_acquire_barrier(struct i915_active *ref);
>> > +void i915_request_add_barriers(struct i915_request *rq);
>> > +
>> >  #endif /* _I915_ACTIVE_H_ */
>> > diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
>> > index b679253b53a5..c025991b9233 100644
>> > --- a/drivers/gpu/drm/i915/i915_active_types.h
>> > +++ b/drivers/gpu/drm/i915/i915_active_types.h
>> > @@ -7,6 +7,7 @@
>> >  #ifndef _I915_ACTIVE_TYPES_H_
>> >  #define _I915_ACTIVE_TYPES_H_
>> >  
>> > +#include <linux/llist.h>
>> >  #include <linux/rbtree.h>
>> >  #include <linux/rcupdate.h>
>> >  
>> > @@ -31,6 +32,8 @@ struct i915_active {
>> >       unsigned int count;
>> >  
>> >       void (*retire)(struct i915_active *ref);
>> > +
>> > +     struct llist_head barriers;
>> >  };
>> >  
>> >  #endif /* _I915_ACTIVE_TYPES_H_ */
>> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> > index e980c1ee3dcf..0663f2df65d6 100644
>> > --- a/drivers/gpu/drm/i915/i915_gem.c
>> > +++ b/drivers/gpu/drm/i915/i915_gem.c
>> > @@ -1197,10 +1197,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>> >  
>> >       intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
>> >       intel_runtime_pm_put(i915, wakeref);
>> > -
>> > -     mutex_lock(&i915->drm.struct_mutex);
>> > -     i915_gem_contexts_lost(i915);
>> > -     mutex_unlock(&i915->drm.struct_mutex);
>> >  }
>> >  
>> >  void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
>> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>> > index e9b59eea4f10..9eff9de7fa10 100644
>> > --- a/drivers/gpu/drm/i915/i915_request.c
>> > +++ b/drivers/gpu/drm/i915/i915_request.c
>> > @@ -213,18 +213,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
>> >       spin_unlock(&rq->lock);
>> >  
>> >       local_irq_enable();
>> > -
>> > -     /*
>> > -      * The backing object for the context is done after switching to the
>> > -      * *next* context. Therefore we cannot retire the previous context until
>> > -      * the next context has already started running. However, since we
>> > -      * cannot take the required locks at i915_request_submit() we
>> > -      * defer the unpinning of the active context to now, retirement of
>> > -      * the subsequent request.
>> > -      */
>> > -     if (engine->last_retired_context)
>> > -             intel_context_unpin(engine->last_retired_context);
>> > -     engine->last_retired_context = rq->hw_context;
>> >  }
>> >  
>> >  static void __retire_engine_upto(struct intel_engine_cs *engine,
>> > @@ -759,9 +747,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>> >  
>> >       rq->infix = rq->ring->emit; /* end of header; start of user payload */
>> >  
>> > -     /* Keep a second pin for the dual retirement along engine and ring */
>> > -     __intel_context_pin(ce);
>> > -
>> >       intel_context_mark_active(ce);
>> >       return rq;
>> >  
>> > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
>> > index b7f3fbb4ae89..a96d0c012d46 100644
>> > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
>> > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
>> > @@ -56,7 +56,6 @@ static void mock_device_release(struct drm_device *dev)
>> >  
>> >       mutex_lock(&i915->drm.struct_mutex);
>> >       mock_device_flush(i915);
>> > -     i915_gem_contexts_lost(i915);
>> >       mutex_unlock(&i915->drm.struct_mutex);
>> >  
>> >       flush_work(&i915->gem.idle_work);
>> > -- 
>> > 2.20.1
>> >
>> > _______________________________________________
>> > Intel-gfx mailing list
>> > Intel-gfx@lists.freedesktop.org
>> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12 14:09       ` Mika Kuoppala
@ 2019-06-12 14:17         ` Chris Wilson
  0 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2019-06-12 14:17 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-06-12 15:09:49)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> >> > +int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
> >> > +                                         struct intel_engine_cs *engine)
> >> > +{
> >> > +     struct drm_i915_private *i915 = engine->i915;
> >> > +     unsigned long tmp;
> >> > +     int err = 0;
> >> > +
> >> > +     GEM_BUG_ON(!engine->mask);
> >> > +     for_each_engine_masked(engine, i915, engine->mask, tmp) {
> >> > +             struct intel_context *kctx = engine->kernel_context;
> >> > +             struct active_node *node;
> >> > +
> >> > +             node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
> >> > +             if (unlikely(!node)) {
> >> > +                     err = -ENOMEM;
> >> > +                     break;
> >> > +             }
> >> > +
> >> > +             i915_active_request_init(&node->base,
> >> > +                                      (void *)engine, node_retire);
> 
> In commit you promise that you will queue a request for kernel context.
> But in here, you seem to use (abuse!?) the active request to
> make a shadow of a request and use it to call the idle barriers.

So this is preallocate, because we have to reserve some memory when
pinning the context as we cannot allocate during unpinning (which can
happen inside the shrinker).

Then on context unpin, we attach this preallocated node onto a list on
the engine, all while keeping the context marked as still active.

Later on, when the engine is marked as idle we flush the barriers with a
context switch to the kernel, see switch_to_kernel_context() in
intel_engine_pm.c, and the i915_request_add_barriers().

The intention is that we also call i915_request_add_barriers() when
emitting the heartbeat requests (once we eliminate the struct_mutex
interposition) so that there is a regular idling pulse and contexts are
not pinned for an eternity.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v2] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12  9:31 ` [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
  2019-06-12 13:29   ` Mika Kuoppala
@ 2019-06-12 14:26   ` Chris Wilson
  2019-06-14  9:22     ` Mika Kuoppala
  1 sibling, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2019-06-12 14:26 UTC (permalink / raw)
  To: intel-gfx

We need to keep the context image pinned in memory until after the GPU
has finished writing into it. Since it continues to write as we signal
the final breadcrumb, we need to keep it pinned until the request after
it is complete. Currently we know the order in which requests execute on
each engine, and so to remove that presumption we need to identify a
request/context-switch we know must occur after our completion. Any
request queued after the signal must imply a context switch, for
simplicity we use a fresh request from the kernel context.

The sequence of operations for keeping the context pinned until saved is:

 - On context activation, we preallocate a node for each physical engine
   the context may operate on. This is to avoid allocations during
   unpinning, which may be from inside FS_RECLAIM context (aka the
   shrinker)

 - On context deactivation on retirement of the last active request (which
   is before we know the context has been saved), we add the
   preallocated node onto a barrier list on each engine

 - On engine idling, we emit a switch to kernel context. When this
   switch completes, we know that all previous contexts must have been
   saved, and so on retiring this request we can finally unpin all the
   contexts that were marked as deactivated prior to the switch.

We can enhance this in future by flushing all the idle contexts on a
regular heartbeat pulse of a switch to kernel context, which will also
be used to check for hung engines.

v2: intel_context_active_acquire/_release

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 24 ++----
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  1 -
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        | 20 ++++-
 drivers/gpu/drm/i915/gt/intel_context.c       | 80 ++++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_context.h       |  3 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |  6 +-
 drivers/gpu/drm/i915/gt/intel_engine.h        |  2 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 23 +-----
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  2 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  | 13 +--
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 62 ++------------
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 44 +---------
 drivers/gpu/drm/i915/gt/mock_engine.c         | 11 +--
 drivers/gpu/drm/i915/i915_active.c            | 78 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_active.h            |  5 ++
 drivers/gpu/drm/i915/i915_active_types.h      |  3 +
 drivers/gpu/drm/i915/i915_gem.c               |  4 -
 drivers/gpu/drm/i915/i915_request.c           | 15 ----
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 -
 19 files changed, 213 insertions(+), 184 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index c86ca9f21532..6200060aef05 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -692,17 +692,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for_each_engine(engine, dev_priv, id)
-		intel_engine_lost_context(engine);
-}
-
 void i915_gem_contexts_fini(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -1203,10 +1192,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	if (ret)
 		goto out_add;
 
-	ret = gen8_emit_rpcs_config(rq, ce, sseu);
-	if (ret)
-		goto out_add;
-
 	/*
 	 * Guarantee context image and the timeline remains pinned until the
 	 * modifying request is retired by setting the ce activity tracker.
@@ -1214,9 +1199,12 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	 * But we only need to take one pin on the account of it. Or in other
 	 * words transfer the pinned ce object to tracked active request.
 	 */
-	if (!i915_active_request_isset(&ce->active_tracker))
-		__intel_context_pin(ce);
-	__i915_active_request_set(&ce->active_tracker, rq);
+	GEM_BUG_ON(i915_active_is_idle(&ce->active));
+	ret = i915_active_ref(&ce->active, rq->fence.context, rq);
+	if (ret)
+		goto out_add;
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
 out_add:
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 630392c77e48..9691dd062f72 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -134,7 +134,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 
 /* i915_gem_context.c */
 int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
 void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 6e75702c5671..141f3ea349a4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -10,6 +10,22 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+		struct i915_active_request *active =
+			container_of((struct list_head *)node,
+				     typeof(*active), link);
+
+		INIT_LIST_HEAD(&active->link);
+		RCU_INIT_POINTER(active->request, NULL);
+
+		active->retire(active, NULL);
+	}
+}
+
 static void i915_gem_park(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -17,8 +33,10 @@ static void i915_gem_park(struct drm_i915_private *i915)
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	for_each_engine(engine, i915, id)
+	for_each_engine(engine, i915, id) {
+		call_idle_barriers(engine); /* cleanup after wedging */
 		i915_gem_batch_pool_fini(&engine->batch_pool);
+	}
 
 	i915_timelines_park(i915);
 	i915_vma_parked(i915);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index c78ec0b58e77..8e299c631575 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce)
 
 		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
 
-		intel_context_get(ce);
 		smp_mb__before_atomic(); /* flush pin before it is visible */
 	}
 
@@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce)
 		ce->ops->unpin(ce);
 
 		i915_gem_context_put(ce->gem_context);
-		intel_context_put(ce);
+		intel_context_active_release(ce);
 	}
 
 	mutex_unlock(&ce->pin_mutex);
 	intel_context_put(ce);
 }
 
-static void intel_context_retire(struct i915_active_request *active,
-				 struct i915_request *rq)
+static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
 {
-	struct intel_context *ce =
-		container_of(active, typeof(*ce), active_tracker);
+	int err;
 
-	intel_context_unpin(ce);
+	err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
+	if (err)
+		return err;
+
+	/*
+	 * And mark it as a globally pinned object to let the shrinker know
+	 * it cannot reclaim the object until we release it.
+	 */
+	vma->obj->pin_global++;
+	vma->obj->mm.dirty = true;
+
+	return 0;
+}
+
+static void __context_unpin_state(struct i915_vma *vma)
+{
+	vma->obj->pin_global--;
+	__i915_vma_unpin(vma);
+}
+
+static void intel_context_retire(struct i915_active *active)
+{
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+	if (ce->state)
+		__context_unpin_state(ce->state);
+
+	intel_context_put(ce);
 }
 
 void
@@ -125,8 +149,46 @@ intel_context_init(struct intel_context *ce,
 
 	mutex_init(&ce->pin_mutex);
 
-	i915_active_request_init(&ce->active_tracker,
-				 NULL, intel_context_retire);
+	i915_active_init(ctx->i915, &ce->active, intel_context_retire);
+}
+
+int intel_context_active_acquire(struct intel_context *ce, unsigned long flags)
+{
+	int err;
+
+	if (!i915_active_acquire(&ce->active))
+		return 0;
+
+	intel_context_get(ce);
+
+	if (!ce->state)
+		return 0;
+
+	err = __context_pin_state(ce->state, flags);
+	if (err) {
+		i915_active_cancel(&ce->active);
+		intel_context_put(ce);
+		return err;
+	}
+
+	/* Preallocate tracking nodes */
+	if (!i915_gem_context_is_kernel(ce->gem_context)) {
+		err = i915_active_acquire_preallocate_barrier(&ce->active,
+							      ce->engine);
+		if (err) {
+			i915_active_release(&ce->active);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+void intel_context_active_release(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	i915_active_acquire_barrier(&ce->active);
+	i915_active_release(&ce->active);
 }
 
 static void i915_global_context_shrink(void)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 6d5453ba2c1e..a47275bc4f01 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -102,6 +102,9 @@ static inline void intel_context_exit(struct intel_context *ce)
 		ce->ops->exit(ce);
 }
 
+int intel_context_active_acquire(struct intel_context *ce, unsigned long flags);
+void intel_context_active_release(struct intel_context *ce);
+
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
 	kref_get(&ce->ref);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 825fcf0ac9c4..e95be4be9612 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -56,10 +56,10 @@ struct intel_context {
 	intel_engine_mask_t saturated; /* submitting semaphores too late? */
 
 	/**
-	 * active_tracker: Active tracker for the external rq activity
-	 * on this intel_context object.
+	 * active: Active tracker for the rq activity (inc. external) on this
+	 * intel_context object.
 	 */
-	struct i915_active_request active_tracker;
+	struct i915_active active;
 
 	const struct intel_context_ops *ops;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 201bbd2a4faf..b9fd88f21609 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -466,8 +466,6 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-void intel_engine_lost_context(struct intel_engine_cs *engine);
-
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c0d986db5a75..5a08036ae774 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -611,6 +611,8 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 {
 	int err;
 
+	init_llist_head(&engine->barrier_tasks);
+
 	err = init_status_page(engine);
 	if (err)
 		return err;
@@ -870,6 +872,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->preempt_context)
 		intel_context_unpin(engine->preempt_context);
 	intel_context_unpin(engine->kernel_context);
+	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 
 	i915_timeline_fini(&engine->timeline);
 
@@ -1201,26 +1204,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 		engine->set_default_submission(engine);
 }
 
-/**
- * intel_engine_lost_context: called when the GPU is reset into unknown state
- * @engine: the engine
- *
- * We have either reset the GPU or otherwise about to lose state tracking of
- * the current GPU logical state (e.g. suspend). On next use, it is therefore
- * imperative that we make no presumptions about the current state and load
- * from scratch.
- */
-void intel_engine_lost_context(struct intel_engine_cs *engine)
-{
-	struct intel_context *ce;
-
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-}
-
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index ccf034764741..3c448a061abd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -88,6 +88,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 
 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
+
+	i915_request_add_barriers(rq);
 	__i915_request_commit(rq);
 
 	return false;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 01223864237a..33a31aa2d2ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -11,6 +11,7 @@
 #include <linux/irq_work.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/types.h>
 
 #include "i915_gem.h"
@@ -288,6 +289,7 @@ struct intel_engine_cs {
 	struct intel_ring *buffer;
 
 	struct i915_timeline timeline;
+	struct llist_head barrier_tasks;
 
 	struct intel_context *kernel_context; /* pinned */
 	struct intel_context *preempt_context; /* pinned; optional */
@@ -435,17 +437,6 @@ struct intel_engine_cs {
 
 	struct intel_engine_execlists execlists;
 
-	/* Contexts are pinned whilst they are active on the GPU. The last
-	 * context executed remains active whilst the GPU is idle - the
-	 * switch away and write to the context object only occurs on the
-	 * next execution.  Contexts are only unpinned on retirement of the
-	 * following request ensuring that we can always write to the object
-	 * on the context switch even after idling. Across suspend, we switch
-	 * to the kernel context and trash it as the save may not happen
-	 * before the hardware is powered down.
-	 */
-	struct intel_context *last_retired_context;
-
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b8f5592da18f..d0a51752386f 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1422,60 +1422,11 @@ static void execlists_context_destroy(struct kref *kref)
 	intel_context_free(ce);
 }
 
-static int __context_pin(struct i915_vma *vma)
-{
-	unsigned int flags;
-	int err;
-
-	flags = PIN_GLOBAL | PIN_HIGH;
-	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
-	err = i915_vma_pin(vma, 0, 0, flags);
-	if (err)
-		return err;
-
-	vma->obj->pin_global++;
-	vma->obj->mm.dirty = true;
-
-	return 0;
-}
-
-static void __context_unpin(struct i915_vma *vma)
-{
-	vma->obj->pin_global--;
-	__i915_vma_unpin(vma);
-}
-
 static void execlists_context_unpin(struct intel_context *ce)
 {
-	struct intel_engine_cs *engine;
-
-	/*
-	 * The tasklet may still be using a pointer to our state, via an
-	 * old request. However, since we know we only unpin the context
-	 * on retirement of the following request, we know that the last
-	 * request referencing us will have had a completion CS interrupt.
-	 * If we see that it is still active, it means that the tasklet hasn't
-	 * had the chance to run yet; let it run before we teardown the
-	 * reference it may use.
-	 */
-	engine = READ_ONCE(ce->inflight);
-	if (unlikely(engine)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&engine->timeline.lock, flags);
-		process_csb(engine);
-		spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-		GEM_BUG_ON(READ_ONCE(ce->inflight));
-	}
-
 	i915_gem_context_unpin_hw_id(ce->gem_context);
-
-	intel_ring_unpin(ce->ring);
-
 	i915_gem_object_unpin_map(ce->state->obj);
-	__context_unpin(ce->state);
+	intel_ring_unpin(ce->ring);
 }
 
 static void
@@ -1512,7 +1463,10 @@ __execlists_context_pin(struct intel_context *ce,
 		goto err;
 	GEM_BUG_ON(!ce->state);
 
-	ret = __context_pin(ce->state);
+	ret = intel_context_active_acquire(ce,
+					   engine->i915->ggtt.pin_bias |
+					   PIN_OFFSET_BIAS |
+					   PIN_HIGH);
 	if (ret)
 		goto err;
 
@@ -1521,7 +1475,7 @@ __execlists_context_pin(struct intel_context *ce,
 					I915_MAP_OVERRIDE);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
-		goto unpin_vma;
+		goto unpin_active;
 	}
 
 	ret = intel_ring_pin(ce->ring);
@@ -1542,8 +1496,8 @@ __execlists_context_pin(struct intel_context *ce,
 	intel_ring_unpin(ce->ring);
 unpin_map:
 	i915_gem_object_unpin_map(ce->state->obj);
-unpin_vma:
-	__context_unpin(ce->state);
+unpin_active:
+	intel_context_active_release(ce);
 err:
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index c834d016c965..7497c9ce668e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1349,45 +1349,9 @@ static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
 		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
 }
 
-static int __context_pin(struct intel_context *ce)
-{
-	struct i915_vma *vma;
-	int err;
-
-	vma = ce->state;
-	if (!vma)
-		return 0;
-
-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-	if (err)
-		return err;
-
-	/*
-	 * And mark is as a globally pinned object to let the shrinker know
-	 * it cannot reclaim the object until we release it.
-	 */
-	vma->obj->pin_global++;
-	vma->obj->mm.dirty = true;
-
-	return 0;
-}
-
-static void __context_unpin(struct intel_context *ce)
-{
-	struct i915_vma *vma;
-
-	vma = ce->state;
-	if (!vma)
-		return;
-
-	vma->obj->pin_global--;
-	i915_vma_unpin(vma);
-}
-
 static void ring_context_unpin(struct intel_context *ce)
 {
 	__context_unpin_ppgtt(ce->gem_context);
-	__context_unpin(ce);
 }
 
 static struct i915_vma *
@@ -1477,18 +1441,18 @@ static int ring_context_pin(struct intel_context *ce)
 		ce->state = vma;
 	}
 
-	err = __context_pin(ce);
+	err = intel_context_active_acquire(ce, PIN_HIGH);
 	if (err)
 		return err;
 
 	err = __context_pin_ppgtt(ce->gem_context);
 	if (err)
-		goto err_unpin;
+		goto err_active;
 
 	return 0;
 
-err_unpin:
-	__context_unpin(ce);
+err_active:
+	intel_context_active_release(ce);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 6d7562769eb2..d1ef515bac8d 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -146,12 +146,18 @@ static void mock_context_destroy(struct kref *ref)
 
 static int mock_context_pin(struct intel_context *ce)
 {
+	int ret;
+
 	if (!ce->ring) {
 		ce->ring = mock_ring(ce->engine);
 		if (!ce->ring)
 			return -ENOMEM;
 	}
 
+	ret = intel_context_active_acquire(ce, PIN_HIGH);
+	if (ret)
+		return ret;
+
 	mock_timeline_pin(ce->ring->timeline);
 	return 0;
 }
@@ -328,14 +334,9 @@ void mock_engine_free(struct intel_engine_cs *engine)
 {
 	struct mock_engine *mock =
 		container_of(engine, typeof(*mock), base);
-	struct intel_context *ce;
 
 	GEM_BUG_ON(timer_pending(&mock->hw_delay));
 
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-
 	intel_context_unpin(engine->kernel_context);
 
 	intel_engine_fini_breadcrumbs(engine);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 863ae12707ba..2d019ac6db20 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -157,6 +157,7 @@ void i915_active_init(struct drm_i915_private *i915,
 	ref->retire = retire;
 	ref->tree = RB_ROOT;
 	i915_active_request_init(&ref->last, NULL, last_retire);
+	init_llist_head(&ref->barriers);
 	ref->count = 0;
 }
 
@@ -263,6 +264,83 @@ void i915_active_fini(struct i915_active *ref)
 }
 #endif
 
+int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
+					    struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	unsigned long tmp;
+	int err = 0;
+
+	GEM_BUG_ON(!engine->mask);
+	for_each_engine_masked(engine, i915, engine->mask, tmp) {
+		struct intel_context *kctx = engine->kernel_context;
+		struct active_node *node;
+
+		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
+		if (unlikely(!node)) {
+			err = -ENOMEM;
+			break;
+		}
+
+		i915_active_request_init(&node->base,
+					 (void *)engine, node_retire);
+		node->timeline = kctx->ring->timeline->fence_context;
+		node->ref = ref;
+		ref->count++;
+
+		llist_add((struct llist_node *)&node->base.link,
+			  &ref->barriers);
+	}
+
+	return err;
+}
+
+void i915_active_acquire_barrier(struct i915_active *ref)
+{
+	struct llist_node *pos, *next;
+
+	i915_active_acquire(ref);
+
+	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
+		struct intel_engine_cs *engine;
+		struct active_node *node;
+		struct rb_node **p, *parent;
+
+		node = container_of((struct list_head *)pos,
+				    typeof(*node), base.link);
+
+		engine = (void *)rcu_access_pointer(node->base.request);
+		RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
+
+		parent = NULL;
+		p = &ref->tree.rb_node;
+		while (*p) {
+			parent = *p;
+			if (rb_entry(parent,
+				     struct active_node,
+				     node)->timeline < node->timeline)
+				p = &parent->rb_right;
+			else
+				p = &parent->rb_left;
+		}
+		rb_link_node(&node->node, parent, p);
+		rb_insert_color(&node->node, &ref->tree);
+
+		llist_add((struct llist_node *)&node->base.link,
+			  &engine->barrier_tasks);
+	}
+	i915_active_release(ref);
+}
+
+void i915_request_add_barriers(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
+		list_add_tail((struct list_head *)node, &rq->active_list);
+}
+
 int i915_active_request_set(struct i915_active_request *active,
 			    struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 7d758719ce39..d55d37673944 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -406,4 +406,9 @@ void i915_active_fini(struct i915_active *ref);
 static inline void i915_active_fini(struct i915_active *ref) { }
 #endif
 
+int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
+					    struct intel_engine_cs *engine);
+void i915_active_acquire_barrier(struct i915_active *ref);
+void i915_request_add_barriers(struct i915_request *rq);
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index b679253b53a5..c025991b9233 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -7,6 +7,7 @@
 #ifndef _I915_ACTIVE_TYPES_H_
 #define _I915_ACTIVE_TYPES_H_
 
+#include <linux/llist.h>
 #include <linux/rbtree.h>
 #include <linux/rcupdate.h>
 
@@ -31,6 +32,8 @@ struct i915_active {
 	unsigned int count;
 
 	void (*retire)(struct i915_active *ref);
+
+	struct llist_head barriers;
 };
 
 #endif /* _I915_ACTIVE_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0d9282b673de..c0f5a00b659a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1197,10 +1197,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(i915, wakeref);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	i915_gem_contexts_lost(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 1cbc3ef4fc27..c2802bbb0cf6 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -213,18 +213,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	spin_unlock(&rq->lock);
 
 	local_irq_enable();
-
-	/*
-	 * The backing object for the context is done after switching to the
-	 * *next* context. Therefore we cannot retire the previous context until
-	 * the next context has already started running. However, since we
-	 * cannot take the required locks at i915_request_submit() we
-	 * defer the unpinning of the active context to now, retirement of
-	 * the subsequent request.
-	 */
-	if (engine->last_retired_context)
-		intel_context_unpin(engine->last_retired_context);
-	engine->last_retired_context = rq->hw_context;
 }
 
 static void __retire_engine_upto(struct intel_engine_cs *engine,
@@ -759,9 +747,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 
 	rq->infix = rq->ring->emit; /* end of header; start of user payload */
 
-	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(ce);
-
 	intel_context_mark_active(ce);
 	return rq;
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 1e9ffced78c1..35c92d1db198 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -56,7 +56,6 @@ static void mock_device_release(struct drm_device *dev)
 
 	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	flush_work(&i915->gem.idle_work);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2)
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (10 preceding siblings ...)
  2019-06-12 10:16 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-06-12 15:29 ` Patchwork
  2019-06-12 15:33 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2019-06-12 15:29 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2)
URL   : https://patchwork.freedesktop.org/series/61946/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
8566c15aaa58 drm/i915: Keep contexts pinned until after the next kernel context switch
11e02dde299e drm/i915: Stop retiring along engine
a458b25070a5 drm/i915: Replace engine->timeline with a plain list
-:180: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#180: FILE: drivers/gpu/drm/i915/gt/intel_engine_types.h:292:
+		spinlock_t lock;

total: 0 errors, 0 warnings, 1 checks, 968 lines checked
d40bab648ec8 drm/i915: Flush the execution-callbacks on retiring
aff0830f921c drm/i915/execlists: Preempt-to-busy
-:1494: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'p_ptr' - possible side-effects?
#1494: FILE: drivers/gpu/drm/i915/i915_utils.h:134:
+#define ptr_count_dec(p_ptr) do {					\
+	typeof(p_ptr) __p = (p_ptr);					\
+	unsigned long __v = (unsigned long)(*__p);			\
+	*__p = (typeof(*p_ptr))(--__v);					\
+} while (0)

-:1500: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'p_ptr' - possible side-effects?
#1500: FILE: drivers/gpu/drm/i915/i915_utils.h:140:
+#define ptr_count_inc(p_ptr) do {					\
+	typeof(p_ptr) __p = (p_ptr);					\
+	unsigned long __v = (unsigned long)(*__p);			\
+	*__p = (typeof(*p_ptr))(++__v);					\
+} while (0)

-:1783: WARNING:LINE_SPACING: Missing a blank line after declarations
#1783: FILE: drivers/gpu/drm/i915/intel_guc_submission.c:820:
+		int rem = ARRAY_SIZE(execlists->inflight) - idx;
+		memmove(execlists->inflight, port, rem * sizeof(*port));

total: 0 errors, 1 warnings, 2 checks, 1682 lines checked
cd324ac73cb8 drm/i915/execlists: Minimalistic timeslicing
-:345: WARNING:LONG_LINE: line over 100 characters
#345: FILE: drivers/gpu/drm/i915/gt/selftest_lrc.c:211:
+			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {

total: 0 errors, 1 warnings, 0 checks, 426 lines checked
3028af14aae1 drm/i915/execlists: Force preemption

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2)
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (11 preceding siblings ...)
  2019-06-12 15:29 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2) Patchwork
@ 2019-06-12 15:33 ` Patchwork
  2019-06-12 16:00 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  15 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2019-06-12 15:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2)
URL   : https://patchwork.freedesktop.org/series/61946/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Keep contexts pinned until after the next kernel context switch
Okay!

Commit: drm/i915: Stop retiring along engine
Okay!

Commit: drm/i915: Replace engine->timeline with a plain list
Okay!

Commit: drm/i915: Flush the execution-callbacks on retiring
Okay!

Commit: drm/i915/execlists: Preempt-to-busy
-drivers/gpu/drm/i915/selftests/../i915_utils.h:220:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_utils.h:232:16: warning: expression using sizeof(void)

Commit: drm/i915/execlists: Minimalistic timeslicing
+drivers/gpu/drm/i915/gt/intel_lrc.c:876:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gt/intel_lrc.c:876:16: warning: expression using sizeof(void)

Commit: drm/i915/execlists: Force preemption
+
+drivers/gpu/drm/i915/i915_utils.h:232:16: warning: expression using sizeof(void)
+Error in reading or end of file.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2)
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (12 preceding siblings ...)
  2019-06-12 15:33 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-06-12 16:00 ` Patchwork
  2019-06-13  6:16 ` ✗ Fi.CI.IGT: failure for series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Patchwork
  2019-06-14  9:58 ` ✗ Fi.CI.IGT: failure for series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2) Patchwork
  15 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2019-06-12 16:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2)
URL   : https://patchwork.freedesktop.org/series/61946/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6251 -> Patchwork_13254
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/

Known issues
------------

  Here are the changes found in Patchwork_13254 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@vgem_basic@dmabuf-fence:
    - fi-icl-u3:          [PASS][1] -> [DMESG-WARN][2] ([fdo#107724]) +2 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/fi-icl-u3/igt@vgem_basic@dmabuf-fence.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/fi-icl-u3/igt@vgem_basic@dmabuf-fence.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s4-devices:
    - fi-blb-e6850:       [INCOMPLETE][3] ([fdo#107718]) -> [PASS][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/fi-blb-e6850/igt@gem_exec_suspend@basic-s4-devices.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/fi-blb-e6850/igt@gem_exec_suspend@basic-s4-devices.html

  * igt@gem_flink_basic@double-flink:
    - fi-icl-u3:          [DMESG-WARN][5] ([fdo#107724]) -> [PASS][6] +1 similar issue
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/fi-icl-u3/igt@gem_flink_basic@double-flink.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/fi-icl-u3/igt@gem_flink_basic@double-flink.html

  
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724


Participating hosts (51 -> 47)
------------------------------

  Additional (2): fi-icl-dsi fi-skl-iommu 
  Missing    (6): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper 


Build changes
-------------

  * Linux: CI_DRM_6251 -> Patchwork_13254

  CI_DRM_6251: 796f31ae8178af1598db09c4640873504a36b395 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5053: b90ebd9c21518f305a61ee50aea38462ef01e65c @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13254: 3028af14aae113fb2d3aacbaf81e841584ef3734 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

3028af14aae1 drm/i915/execlists: Force preemption
cd324ac73cb8 drm/i915/execlists: Minimalistic timeslicing
aff0830f921c drm/i915/execlists: Preempt-to-busy
d40bab648ec8 drm/i915: Flush the execution-callbacks on retiring
a458b25070a5 drm/i915: Replace engine->timeline with a plain list
11e02dde299e drm/i915: Stop retiring along engine
8566c15aaa58 drm/i915: Keep contexts pinned until after the next kernel context switch

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (13 preceding siblings ...)
  2019-06-12 16:00 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-06-13  6:16 ` Patchwork
  2019-06-14  9:58 ` ✗ Fi.CI.IGT: failure for series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2) Patchwork
  15 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2019-06-13  6:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch
URL   : https://patchwork.freedesktop.org/series/61946/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6244_full -> Patchwork_13250_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_13250_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_13250_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_13250_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_ctx_engines@execute-one:
    - shard-skl:          [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-skl5/igt@gem_ctx_engines@execute-one.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-skl1/igt@gem_ctx_engines@execute-one.html
    - shard-apl:          [PASS][3] -> [FAIL][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-apl2/igt@gem_ctx_engines@execute-one.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-apl6/igt@gem_ctx_engines@execute-one.html
    - shard-glk:          [PASS][5] -> [FAIL][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-glk6/igt@gem_ctx_engines@execute-one.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-glk6/igt@gem_ctx_engines@execute-one.html

  * igt@gem_exec_await@wide-contexts:
    - shard-kbl:          [PASS][7] -> [FAIL][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-kbl7/igt@gem_exec_await@wide-contexts.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-kbl4/igt@gem_exec_await@wide-contexts.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-snb:          [PASS][9] -> [DMESG-WARN][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-snb1/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-snb4/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  

### Piglit changes ###

#### Possible regressions ####

  * spec@arb_shader_image_load_store@shader-mem-barrier (NEW):
    - pig-glk-j5005:      NOTRUN -> [FAIL][11] +3 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/pig-glk-j5005/spec@arb_shader_image_load_store@shader-mem-barrier.html
    - pig-skl-6260u:      NOTRUN -> [FAIL][12]
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/pig-skl-6260u/spec@arb_shader_image_load_store@shader-mem-barrier.html

  
New tests
---------

  New tests have been introduced between CI_DRM_6244_full and Patchwork_13250_full:

### New Piglit tests (4) ###

  * spec@arb_shader_image_load_store@shader-mem-barrier:
    - Statuses : 2 fail(s)
    - Exec time: [0.16, 0.19] s

  * spec@ext_transform_feedback@order arrays points:
    - Statuses : 1 fail(s)
    - Exec time: [0.17] s

  * spec@ext_transform_feedback@order elements triangles:
    - Statuses : 1 fail(s)
    - Exec time: [0.14] s

  * spec@glsl-1.30@execution@fs-execution-ordering:
    - Statuses : 1 fail(s)
    - Exec time: [0.67] s

  

Known issues
------------

  Here are the changes found in Patchwork_13250_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_softpin@noreloc-s3:
    - shard-skl:          [PASS][13] -> [INCOMPLETE][14] ([fdo#104108])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-skl5/igt@gem_softpin@noreloc-s3.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-skl9/igt@gem_softpin@noreloc-s3.html

  * igt@gem_tiled_swapping@non-threaded:
    - shard-apl:          [PASS][15] -> [INCOMPLETE][16] ([fdo#103927] / [fdo#108686])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-apl6/igt@gem_tiled_swapping@non-threaded.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-apl4/igt@gem_tiled_swapping@non-threaded.html
    - shard-kbl:          [PASS][17] -> [DMESG-WARN][18] ([fdo#108686])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-kbl3/igt@gem_tiled_swapping@non-threaded.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-kbl6/igt@gem_tiled_swapping@non-threaded.html

  * igt@gem_workarounds@suspend-resume-context:
    - shard-apl:          [PASS][19] -> [DMESG-WARN][20] ([fdo#108566]) +1 similar issue
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-apl3/igt@gem_workarounds@suspend-resume-context.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-apl6/igt@gem_workarounds@suspend-resume-context.html

  * igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw:
    - shard-iclb:         [PASS][21] -> [FAIL][22] ([fdo#103167]) +6 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-iclb6/igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-iclb5/igt@kms_frontbuffer_tracking@fbc-1p-pri-indfb-multidraw.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-cur-indfb-draw-pwrite:
    - shard-hsw:          [PASS][23] -> [SKIP][24] ([fdo#109271]) +10 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-hsw2/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-cur-indfb-draw-pwrite.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-hsw1/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-cur-indfb-draw-pwrite.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-kbl:          [PASS][25] -> [DMESG-WARN][26] ([fdo#108566]) +2 similar issues
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-kbl1/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-kbl3/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_plane@plane-position-covered-pipe-c-planes:
    - shard-skl:          [PASS][27] -> [FAIL][28] ([fdo#110038])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-skl9/igt@kms_plane@plane-position-covered-pipe-c-planes.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-skl7/igt@kms_plane@plane-position-covered-pipe-c-planes.html

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min:
    - shard-skl:          [PASS][29] -> [FAIL][30] ([fdo#108145])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-skl7/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-skl10/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [PASS][31] -> [SKIP][32] ([fdo#109441]) +3 similar issues
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-iclb7/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@kms_rotation_crc@multiplane-rotation-cropping-bottom:
    - shard-kbl:          [PASS][33] -> [DMESG-FAIL][34] ([fdo#105763])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-kbl2/igt@kms_rotation_crc@multiplane-rotation-cropping-bottom.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-kbl3/igt@kms_rotation_crc@multiplane-rotation-cropping-bottom.html

  * igt@kms_setmode@basic:
    - shard-apl:          [PASS][35] -> [FAIL][36] ([fdo#99912])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-apl1/igt@kms_setmode@basic.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-apl2/igt@kms_setmode@basic.html

  * igt@kms_vblank@pipe-c-ts-continuation-suspend:
    - shard-iclb:         [PASS][37] -> [INCOMPLETE][38] ([fdo#107713])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-iclb8/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-iclb3/igt@kms_vblank@pipe-c-ts-continuation-suspend.html

  * igt@syncobj_wait@multi-wait-for-submit-signaled:
    - shard-glk:          [PASS][39] -> [INCOMPLETE][40] ([fdo#103359] / [k.org#198133])
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-glk7/igt@syncobj_wait@multi-wait-for-submit-signaled.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-glk2/igt@syncobj_wait@multi-wait-for-submit-signaled.html

  
#### Possible fixes ####

  * igt@gem_exec_balancer@smoke:
    - shard-iclb:         [SKIP][41] ([fdo#110854]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-iclb5/igt@gem_exec_balancer@smoke.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-iclb1/igt@gem_exec_balancer@smoke.html

  * igt@gem_exec_schedule@semaphore-resolve:
    - shard-skl:          [FAIL][43] ([fdo#110519]) -> [PASS][44]
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-skl9/igt@gem_exec_schedule@semaphore-resolve.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-skl8/igt@gem_exec_schedule@semaphore-resolve.html
    - shard-kbl:          [FAIL][45] ([fdo#110519]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-kbl6/igt@gem_exec_schedule@semaphore-resolve.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-kbl2/igt@gem_exec_schedule@semaphore-resolve.html
    - shard-glk:          [FAIL][47] ([fdo#110519]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-glk5/igt@gem_exec_schedule@semaphore-resolve.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-glk8/igt@gem_exec_schedule@semaphore-resolve.html
    - shard-iclb:         [FAIL][49] ([fdo#110519]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-iclb5/igt@gem_exec_schedule@semaphore-resolve.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-iclb2/igt@gem_exec_schedule@semaphore-resolve.html

  * igt@gem_exec_suspend@basic-s3:
    - shard-kbl:          [DMESG-WARN][51] ([fdo#108566]) -> [PASS][52] +2 similar issues
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-kbl3/igt@gem_exec_suspend@basic-s3.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-kbl6/igt@gem_exec_suspend@basic-s3.html

  * igt@gem_tiled_swapping@non-threaded:
    - shard-hsw:          [FAIL][53] ([fdo#108686]) -> [PASS][54]
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-hsw8/igt@gem_tiled_swapping@non-threaded.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-hsw2/igt@gem_tiled_swapping@non-threaded.html

  * igt@i915_suspend@sysfs-reader:
    - shard-apl:          [DMESG-WARN][55] ([fdo#108566]) -> [PASS][56] +2 similar issues
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-apl8/igt@i915_suspend@sysfs-reader.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-apl1/igt@i915_suspend@sysfs-reader.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-skl:          [FAIL][57] ([fdo#103232]) -> [PASS][58] +1 similar issue
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-skl3/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-skl5/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy:
    - shard-hsw:          [FAIL][59] ([fdo#105767]) -> [PASS][60]
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-hsw6/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-hsw1/igt@kms_cursor_legacy@2x-long-cursor-vs-flip-legacy.html

  * igt@kms_cursor_legacy@cursor-vs-flip-atomic-transitions:
    - shard-hsw:          [FAIL][61] ([fdo#103355]) -> [PASS][62]
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-hsw5/igt@kms_cursor_legacy@cursor-vs-flip-atomic-transitions.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-hsw1/igt@kms_cursor_legacy@cursor-vs-flip-atomic-transitions.html

  * igt@kms_flip@2x-flip-vs-expired-vblank:
    - shard-glk:          [FAIL][63] ([fdo#105363]) -> [PASS][64]
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-glk1/igt@kms_flip@2x-flip-vs-expired-vblank.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-glk9/igt@kms_flip@2x-flip-vs-expired-vblank.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-kbl:          [INCOMPLETE][65] ([fdo#103665]) -> [PASS][66]
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-kbl2/igt@kms_flip@flip-vs-suspend-interruptible.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-kbl7/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-render:
    - shard-iclb:         [FAIL][67] ([fdo#103167]) -> [PASS][68] +2 similar issues
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-iclb1/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-render.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-iclb6/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-cur-indfb-draw-render.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min:
    - shard-skl:          [FAIL][69] ([fdo#108145]) -> [PASS][70]
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-skl3/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-skl5/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html

  * igt@kms_plane_lowres@pipe-a-tiling-x:
    - shard-iclb:         [FAIL][71] ([fdo#103166]) -> [PASS][72]
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-iclb1/igt@kms_plane_lowres@pipe-a-tiling-x.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-iclb2/igt@kms_plane_lowres@pipe-a-tiling-x.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [SKIP][73] ([fdo#109441]) -> [PASS][74] +3 similar issues
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-iclb7/igt@kms_psr@psr2_cursor_render.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-iclb2/igt@kms_psr@psr2_cursor_render.html

  * igt@kms_setmode@basic:
    - shard-skl:          [FAIL][75] ([fdo#99912]) -> [PASS][76]
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6244/shard-skl8/igt@kms_setmode@basic.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/shard-skl4/igt@kms_setmode@basic.html

  
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103355]: https://bugs.freedesktop.org/show_bug.cgi?id=103355
  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105763]: https://bugs.freedesktop.org/show_bug.cgi?id=105763
  [fdo#105767]: https://bugs.freedesktop.org/show_bug.cgi?id=105767
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108686]: https://bugs.freedesktop.org/show_bug.cgi?id=108686
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#110038]: https://bugs.freedesktop.org/show_bug.cgi?id=110038
  [fdo#110519]: https://bugs.freedesktop.org/show_bug.cgi?id=110519
  [fdo#110854]: https://bugs.freedesktop.org/show_bug.cgi?id=110854
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (10 -> 9)
------------------------------

  Missing    (1): pig-hsw-4770r 


Build changes
-------------

  * Linux: CI_DRM_6244 -> Patchwork_13250

  CI_DRM_6244: d7ce900dafd424be9da576fbf3155b43ce5270ec @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5052: ff711b343c06a25ac4995ab8bd9a8bcb5ce1eb10 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13250: 1915c21b218609f12edb1f1ac752b3e81fc084c8 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13250/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-12 14:26   ` [PATCH v2] " Chris Wilson
@ 2019-06-14  9:22     ` Mika Kuoppala
  2019-06-14  9:34       ` Chris Wilson
  0 siblings, 1 reply; 31+ messages in thread
From: Mika Kuoppala @ 2019-06-14  9:22 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We need to keep the context image pinned in memory until after the GPU
> has finished writing into it. Since it continues to write as we signal
> the final breadcrumb, we need to keep it pinned until the request after
> it is complete. Currently we know the order in which requests execute on
> each engine, and so to remove that presumption we need to identify a
> request/context-switch we know must occur after our completion. Any
> request queued after the signal must imply a context switch, for
> simplicity we use a fresh request from the kernel context.
>
> The sequence of operations for keeping the context pinned until saved is:
>
>  - On context activation, we preallocate a node for each physical engine
>    the context may operate on. This is to avoid allocations during
>    unpinning, which may be from inside FS_RECLAIM context (aka the
>    shrinker)
>
>  - On context deactivation on retirement of the last active request (which
>    is before we know the context has been saved), we add the
>    preallocated node onto a barrier list on each engine
>
>  - On engine idling, we emit a switch to kernel context. When this
>    switch completes, we know that all previous contexts must have been
>    saved, and so on retiring this request we can finally unpin all the
>    contexts that were marked as deactivated prior to the switch.
>
> We can enhance this in future by flushing all the idle contexts on a
> regular heartbeat pulse of a switch to kernel context, which will also
> be used to check for hung engines.
>
> v2: intel_context_active_acquire/_release
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 24 ++----
>  drivers/gpu/drm/i915/gem/i915_gem_context.h   |  1 -
>  drivers/gpu/drm/i915/gem/i915_gem_pm.c        | 20 ++++-
>  drivers/gpu/drm/i915/gt/intel_context.c       | 80 ++++++++++++++++---
>  drivers/gpu/drm/i915/gt/intel_context.h       |  3 +
>  drivers/gpu/drm/i915/gt/intel_context_types.h |  6 +-
>  drivers/gpu/drm/i915/gt/intel_engine.h        |  2 -
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 23 +-----
>  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  2 +
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  | 13 +--
>  drivers/gpu/drm/i915/gt/intel_lrc.c           | 62 ++------------
>  drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 44 +---------
>  drivers/gpu/drm/i915/gt/mock_engine.c         | 11 +--
>  drivers/gpu/drm/i915/i915_active.c            | 78 ++++++++++++++++++
>  drivers/gpu/drm/i915/i915_active.h            |  5 ++
>  drivers/gpu/drm/i915/i915_active_types.h      |  3 +
>  drivers/gpu/drm/i915/i915_gem.c               |  4 -
>  drivers/gpu/drm/i915/i915_request.c           | 15 ----
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 -
>  19 files changed, 213 insertions(+), 184 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index c86ca9f21532..6200060aef05 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -692,17 +692,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> -
> -	for_each_engine(engine, dev_priv, id)
> -		intel_engine_lost_context(engine);
> -}
> -
>  void i915_gem_contexts_fini(struct drm_i915_private *i915)
>  {
>  	lockdep_assert_held(&i915->drm.struct_mutex);
> @@ -1203,10 +1192,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>  	if (ret)
>  		goto out_add;
>  
> -	ret = gen8_emit_rpcs_config(rq, ce, sseu);
> -	if (ret)
> -		goto out_add;
> -
>  	/*
>  	 * Guarantee context image and the timeline remains pinned until the
>  	 * modifying request is retired by setting the ce activity tracker.
> @@ -1214,9 +1199,12 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>  	 * But we only need to take one pin on the account of it. Or in other
>  	 * words transfer the pinned ce object to tracked active request.
>  	 */
> -	if (!i915_active_request_isset(&ce->active_tracker))
> -		__intel_context_pin(ce);
> -	__i915_active_request_set(&ce->active_tracker, rq);
> +	GEM_BUG_ON(i915_active_is_idle(&ce->active));
> +	ret = i915_active_ref(&ce->active, rq->fence.context, rq);
> +	if (ret)
> +		goto out_add;
> +
> +	ret = gen8_emit_rpcs_config(rq, ce, sseu);
>  
>  out_add:
>  	i915_request_add(rq);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> index 630392c77e48..9691dd062f72 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> @@ -134,7 +134,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
>  
>  /* i915_gem_context.c */
>  int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
> -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
>  void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
>  
>  int i915_gem_context_open(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 6e75702c5671..141f3ea349a4 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -10,6 +10,22 @@
>  #include "i915_drv.h"
>  #include "i915_globals.h"
>  
> +static void call_idle_barriers(struct intel_engine_cs *engine)
> +{
> +	struct llist_node *node, *next;
> +
> +	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {

Seems to be null safe.

> +		struct i915_active_request *active =
> +			container_of((struct list_head *)node,
> +				     typeof(*active), link);
> +
> +		INIT_LIST_HEAD(&active->link);
> +		RCU_INIT_POINTER(active->request, NULL);
> +
> +		active->retire(active, NULL);
> +	}
> +}
> +
>  static void i915_gem_park(struct drm_i915_private *i915)
>  {
>  	struct intel_engine_cs *engine;
> @@ -17,8 +33,10 @@ static void i915_gem_park(struct drm_i915_private *i915)
>  
>  	lockdep_assert_held(&i915->drm.struct_mutex);
>  
> -	for_each_engine(engine, i915, id)
> +	for_each_engine(engine, i915, id) {
> +		call_idle_barriers(engine); /* cleanup after wedging */
>  		i915_gem_batch_pool_fini(&engine->batch_pool);
> +	}
>  
>  	i915_timelines_park(i915);
>  	i915_vma_parked(i915);
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index c78ec0b58e77..8e299c631575 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce)
>  
>  		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
>  
> -		intel_context_get(ce);
>  		smp_mb__before_atomic(); /* flush pin before it is visible */
>  	}
>  
> @@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce)
>  		ce->ops->unpin(ce);
>  
>  		i915_gem_context_put(ce->gem_context);
> -		intel_context_put(ce);
> +		intel_context_active_release(ce);

Not going to insist any change in naming but I was thinking
here that we arm the barriers.

>  	}
>  
>  	mutex_unlock(&ce->pin_mutex);
>  	intel_context_put(ce);
>  }
>  
> -static void intel_context_retire(struct i915_active_request *active,
> -				 struct i915_request *rq)
> +static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
>  {
> -	struct intel_context *ce =
> -		container_of(active, typeof(*ce), active_tracker);
> +	int err;

Why not ret? I have started to removing errs. Am I swimming in upstream? :P

>  
> -	intel_context_unpin(ce);
> +	err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
> +	if (err)
> +		return err;
> +
> +	/*
> +	 * And mark it as a globally pinned object to let the shrinker know
> +	 * it cannot reclaim the object until we release it.
> +	 */
> +	vma->obj->pin_global++;
> +	vma->obj->mm.dirty = true;
> +
> +	return 0;
> +}
> +
> +static void __context_unpin_state(struct i915_vma *vma)
> +{
> +	vma->obj->pin_global--;
> +	__i915_vma_unpin(vma);
> +}
> +
> +static void intel_context_retire(struct i915_active *active)
> +{
> +	struct intel_context *ce = container_of(active, typeof(*ce), active);
> +
> +	if (ce->state)
> +		__context_unpin_state(ce->state);
> +
> +	intel_context_put(ce);
>  }
>  
>  void
> @@ -125,8 +149,46 @@ intel_context_init(struct intel_context *ce,
>  
>  	mutex_init(&ce->pin_mutex);
>  
> -	i915_active_request_init(&ce->active_tracker,
> -				 NULL, intel_context_retire);
> +	i915_active_init(ctx->i915, &ce->active, intel_context_retire);
> +}
> +
> +int intel_context_active_acquire(struct intel_context *ce, unsigned long flags)
> +{
> +	int err;
> +
> +	if (!i915_active_acquire(&ce->active))
> +		return 0;
> +
> +	intel_context_get(ce);
> +
> +	if (!ce->state)
> +		return 0;
> +
> +	err = __context_pin_state(ce->state, flags);
> +	if (err) {
> +		i915_active_cancel(&ce->active);
> +		intel_context_put(ce);
> +		return err;
> +	}
> +
> +	/* Preallocate tracking nodes */
> +	if (!i915_gem_context_is_kernel(ce->gem_context)) {
> +		err = i915_active_acquire_preallocate_barrier(&ce->active,
> +							      ce->engine);
> +		if (err) {
> +			i915_active_release(&ce->active);

For me it looks like we are missing context put in here.


> +			return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +void intel_context_active_release(struct intel_context *ce)
> +{
> +	/* Nodes preallocated in intel_context_active() */
> +	i915_active_acquire_barrier(&ce->active);
> +	i915_active_release(&ce->active);
>  }
>  
>  static void i915_global_context_shrink(void)
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 6d5453ba2c1e..a47275bc4f01 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -102,6 +102,9 @@ static inline void intel_context_exit(struct intel_context *ce)
>  		ce->ops->exit(ce);
>  }
>  
> +int intel_context_active_acquire(struct intel_context *ce, unsigned long flags);
> +void intel_context_active_release(struct intel_context *ce);
> +
>  static inline struct intel_context *intel_context_get(struct intel_context *ce)
>  {
>  	kref_get(&ce->ref);
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 825fcf0ac9c4..e95be4be9612 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -56,10 +56,10 @@ struct intel_context {
>  	intel_engine_mask_t saturated; /* submitting semaphores too late? */
>  
>  	/**
> -	 * active_tracker: Active tracker for the external rq activity
> -	 * on this intel_context object.
> +	 * active: Active tracker for the rq activity (inc. external) on this
> +	 * intel_context object.
>  	 */
> -	struct i915_active_request active_tracker;
> +	struct i915_active active;
>  
>  	const struct intel_context_ops *ops;
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 201bbd2a4faf..b9fd88f21609 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -466,8 +466,6 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
>  bool intel_engine_is_idle(struct intel_engine_cs *engine);
>  bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
>  
> -void intel_engine_lost_context(struct intel_engine_cs *engine);
> -
>  void intel_engines_reset_default_submission(struct drm_i915_private *i915);
>  unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index c0d986db5a75..5a08036ae774 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -611,6 +611,8 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
>  {
>  	int err;
>  
> +	init_llist_head(&engine->barrier_tasks);
> +
>  	err = init_status_page(engine);
>  	if (err)
>  		return err;
> @@ -870,6 +872,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>  	if (engine->preempt_context)
>  		intel_context_unpin(engine->preempt_context);
>  	intel_context_unpin(engine->kernel_context);
> +	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
>  
>  	i915_timeline_fini(&engine->timeline);
>  
> @@ -1201,26 +1204,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
>  		engine->set_default_submission(engine);
>  }
>  
> -/**
> - * intel_engine_lost_context: called when the GPU is reset into unknown state
> - * @engine: the engine
> - *
> - * We have either reset the GPU or otherwise about to lose state tracking of
> - * the current GPU logical state (e.g. suspend). On next use, it is therefore
> - * imperative that we make no presumptions about the current state and load
> - * from scratch.
> - */
> -void intel_engine_lost_context(struct intel_engine_cs *engine)
> -{
> -	struct intel_context *ce;
> -
> -	lockdep_assert_held(&engine->i915->drm.struct_mutex);
> -
> -	ce = fetch_and_zero(&engine->last_retired_context);
> -	if (ce)
> -		intel_context_unpin(ce);
> -}
> -
>  bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
>  {
>  	switch (INTEL_GEN(engine->i915)) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index ccf034764741..3c448a061abd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -88,6 +88,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>  
>  	/* Check again on the next retirement. */
>  	engine->wakeref_serial = engine->serial + 1;
> +
> +	i915_request_add_barriers(rq);
>  	__i915_request_commit(rq);
>  
>  	return false;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 01223864237a..33a31aa2d2ae 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -11,6 +11,7 @@
>  #include <linux/irq_work.h>
>  #include <linux/kref.h>
>  #include <linux/list.h>
> +#include <linux/llist.h>
>  #include <linux/types.h>
>  
>  #include "i915_gem.h"
> @@ -288,6 +289,7 @@ struct intel_engine_cs {
>  	struct intel_ring *buffer;
>  
>  	struct i915_timeline timeline;
> +	struct llist_head barrier_tasks;
>  
>  	struct intel_context *kernel_context; /* pinned */
>  	struct intel_context *preempt_context; /* pinned; optional */
> @@ -435,17 +437,6 @@ struct intel_engine_cs {
>  
>  	struct intel_engine_execlists execlists;
>  
> -	/* Contexts are pinned whilst they are active on the GPU. The last
> -	 * context executed remains active whilst the GPU is idle - the
> -	 * switch away and write to the context object only occurs on the
> -	 * next execution.  Contexts are only unpinned on retirement of the
> -	 * following request ensuring that we can always write to the object
> -	 * on the context switch even after idling. Across suspend, we switch
> -	 * to the kernel context and trash it as the save may not happen
> -	 * before the hardware is powered down.
> -	 */
> -	struct intel_context *last_retired_context;
> -
>  	/* status_notifier: list of callbacks for context-switch changes */
>  	struct atomic_notifier_head context_status_notifier;
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index b8f5592da18f..d0a51752386f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1422,60 +1422,11 @@ static void execlists_context_destroy(struct kref *kref)
>  	intel_context_free(ce);
>  }
>  
> -static int __context_pin(struct i915_vma *vma)
> -{
> -	unsigned int flags;
> -	int err;
> -
> -	flags = PIN_GLOBAL | PIN_HIGH;
> -	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
> -
> -	err = i915_vma_pin(vma, 0, 0, flags);
> -	if (err)
> -		return err;
> -
> -	vma->obj->pin_global++;
> -	vma->obj->mm.dirty = true;
> -
> -	return 0;
> -}
> -
> -static void __context_unpin(struct i915_vma *vma)
> -{
> -	vma->obj->pin_global--;
> -	__i915_vma_unpin(vma);
> -}
> -
>  static void execlists_context_unpin(struct intel_context *ce)
>  {
> -	struct intel_engine_cs *engine;
> -
> -	/*
> -	 * The tasklet may still be using a pointer to our state, via an
> -	 * old request. However, since we know we only unpin the context
> -	 * on retirement of the following request, we know that the last
> -	 * request referencing us will have had a completion CS interrupt.
> -	 * If we see that it is still active, it means that the tasklet hasn't
> -	 * had the chance to run yet; let it run before we teardown the
> -	 * reference it may use.
> -	 */
> -	engine = READ_ONCE(ce->inflight);
> -	if (unlikely(engine)) {
> -		unsigned long flags;
> -
> -		spin_lock_irqsave(&engine->timeline.lock, flags);
> -		process_csb(engine);
> -		spin_unlock_irqrestore(&engine->timeline.lock, flags);
> -
> -		GEM_BUG_ON(READ_ONCE(ce->inflight));
> -	}
> -
>  	i915_gem_context_unpin_hw_id(ce->gem_context);
> -
> -	intel_ring_unpin(ce->ring);
> -
>  	i915_gem_object_unpin_map(ce->state->obj);
> -	__context_unpin(ce->state);
> +	intel_ring_unpin(ce->ring);
>  }
>  
>  static void
> @@ -1512,7 +1463,10 @@ __execlists_context_pin(struct intel_context *ce,
>  		goto err;
>  	GEM_BUG_ON(!ce->state);
>  
> -	ret = __context_pin(ce->state);
> +	ret = intel_context_active_acquire(ce,
> +					   engine->i915->ggtt.pin_bias |
> +					   PIN_OFFSET_BIAS |
> +					   PIN_HIGH);
>  	if (ret)
>  		goto err;
>  
> @@ -1521,7 +1475,7 @@ __execlists_context_pin(struct intel_context *ce,
>  					I915_MAP_OVERRIDE);
>  	if (IS_ERR(vaddr)) {
>  		ret = PTR_ERR(vaddr);
> -		goto unpin_vma;
> +		goto unpin_active;
>  	}
>  
>  	ret = intel_ring_pin(ce->ring);
> @@ -1542,8 +1496,8 @@ __execlists_context_pin(struct intel_context *ce,
>  	intel_ring_unpin(ce->ring);
>  unpin_map:
>  	i915_gem_object_unpin_map(ce->state->obj);
> -unpin_vma:
> -	__context_unpin(ce->state);
> +unpin_active:
> +	intel_context_active_release(ce);
>  err:
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index c834d016c965..7497c9ce668e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1349,45 +1349,9 @@ static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
>  		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
>  }
>  
> -static int __context_pin(struct intel_context *ce)
> -{
> -	struct i915_vma *vma;
> -	int err;
> -
> -	vma = ce->state;
> -	if (!vma)
> -		return 0;
> -
> -	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> -	if (err)
> -		return err;
> -
> -	/*
> -	 * And mark is as a globally pinned object to let the shrinker know
> -	 * it cannot reclaim the object until we release it.
> -	 */
> -	vma->obj->pin_global++;
> -	vma->obj->mm.dirty = true;
> -
> -	return 0;
> -}
> -
> -static void __context_unpin(struct intel_context *ce)
> -{
> -	struct i915_vma *vma;
> -
> -	vma = ce->state;
> -	if (!vma)
> -		return;
> -
> -	vma->obj->pin_global--;
> -	i915_vma_unpin(vma);
> -}
> -
>  static void ring_context_unpin(struct intel_context *ce)
>  {
>  	__context_unpin_ppgtt(ce->gem_context);
> -	__context_unpin(ce);
>  }
>  
>  static struct i915_vma *
> @@ -1477,18 +1441,18 @@ static int ring_context_pin(struct intel_context *ce)
>  		ce->state = vma;
>  	}
>  
> -	err = __context_pin(ce);
> +	err = intel_context_active_acquire(ce, PIN_HIGH);
>  	if (err)
>  		return err;
>  
>  	err = __context_pin_ppgtt(ce->gem_context);
>  	if (err)
> -		goto err_unpin;
> +		goto err_active;
>  
>  	return 0;
>  
> -err_unpin:
> -	__context_unpin(ce);
> +err_active:
> +	intel_context_active_release(ce);
>  	return err;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index 6d7562769eb2..d1ef515bac8d 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -146,12 +146,18 @@ static void mock_context_destroy(struct kref *ref)
>  
>  static int mock_context_pin(struct intel_context *ce)
>  {
> +	int ret;
> +
>  	if (!ce->ring) {
>  		ce->ring = mock_ring(ce->engine);
>  		if (!ce->ring)
>  			return -ENOMEM;
>  	}
>  
> +	ret = intel_context_active_acquire(ce, PIN_HIGH);
> +	if (ret)
> +		return ret;
> +
>  	mock_timeline_pin(ce->ring->timeline);
>  	return 0;
>  }
> @@ -328,14 +334,9 @@ void mock_engine_free(struct intel_engine_cs *engine)
>  {
>  	struct mock_engine *mock =
>  		container_of(engine, typeof(*mock), base);
> -	struct intel_context *ce;
>  
>  	GEM_BUG_ON(timer_pending(&mock->hw_delay));
>  
> -	ce = fetch_and_zero(&engine->last_retired_context);
> -	if (ce)
> -		intel_context_unpin(ce);
> -
>  	intel_context_unpin(engine->kernel_context);
>  
>  	intel_engine_fini_breadcrumbs(engine);
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 863ae12707ba..2d019ac6db20 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -157,6 +157,7 @@ void i915_active_init(struct drm_i915_private *i915,
>  	ref->retire = retire;
>  	ref->tree = RB_ROOT;
>  	i915_active_request_init(&ref->last, NULL, last_retire);
> +	init_llist_head(&ref->barriers);
>  	ref->count = 0;
>  }
>  
> @@ -263,6 +264,83 @@ void i915_active_fini(struct i915_active *ref)
>  }
>  #endif
>  
> +int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
> +					    struct intel_engine_cs *engine)
> +{
> +	struct drm_i915_private *i915 = engine->i915;
> +	unsigned long tmp;
> +	int err = 0;
> +
> +	GEM_BUG_ON(!engine->mask);
> +	for_each_engine_masked(engine, i915, engine->mask, tmp) {
> +		struct intel_context *kctx = engine->kernel_context;
> +		struct active_node *node;
> +
> +		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
> +		if (unlikely(!node)) {
> +			err = -ENOMEM;
> +			break;
> +		}
> +
> +		i915_active_request_init(&node->base,
> +					 (void *)engine, node_retire);
> +		node->timeline = kctx->ring->timeline->fence_context;
> +		node->ref = ref;
> +		ref->count++;
> +
> +		llist_add((struct llist_node *)&node->base.link,
> +			  &ref->barriers);
> +	}
> +
> +	return err;
> +}
> +
> +void i915_active_acquire_barrier(struct i915_active *ref)
> +{
> +	struct llist_node *pos, *next;
> +
> +	i915_active_acquire(ref);
> +
> +	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
> +		struct intel_engine_cs *engine;
> +		struct active_node *node;
> +		struct rb_node **p, *parent;
> +
> +		node = container_of((struct list_head *)pos,
> +				    typeof(*node), base.link);
> +
> +		engine = (void *)rcu_access_pointer(node->base.request);
> +		RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
> +
> +		parent = NULL;
> +		p = &ref->tree.rb_node;
> +		while (*p) {
> +			parent = *p;
> +			if (rb_entry(parent,
> +				     struct active_node,
> +				     node)->timeline < node->timeline)
> +				p = &parent->rb_right;
> +			else
> +				p = &parent->rb_left;
> +		}
> +		rb_link_node(&node->node, parent, p);
> +		rb_insert_color(&node->node, &ref->tree);
> +
> +		llist_add((struct llist_node *)&node->base.link,
> +			  &engine->barrier_tasks);
> +	}
> +	i915_active_release(ref);
> +}
> +
> +void i915_request_add_barriers(struct i915_request *rq)
> +{
> +	struct intel_engine_cs *engine = rq->engine;
> +	struct llist_node *node, *next;
> +
> +	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
> +		list_add_tail((struct list_head *)node, &rq->active_list);
> +}
> +
>  int i915_active_request_set(struct i915_active_request *active,
>  			    struct i915_request *rq)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 7d758719ce39..d55d37673944 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -406,4 +406,9 @@ void i915_active_fini(struct i915_active *ref);
>  static inline void i915_active_fini(struct i915_active *ref) { }
>  #endif
>  
> +int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
> +					    struct intel_engine_cs *engine);
> +void i915_active_acquire_barrier(struct i915_active *ref);
> +void i915_request_add_barriers(struct i915_request *rq);
> +
>  #endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> index b679253b53a5..c025991b9233 100644
> --- a/drivers/gpu/drm/i915/i915_active_types.h
> +++ b/drivers/gpu/drm/i915/i915_active_types.h
> @@ -7,6 +7,7 @@
>  #ifndef _I915_ACTIVE_TYPES_H_
>  #define _I915_ACTIVE_TYPES_H_
>  
> +#include <linux/llist.h>
>  #include <linux/rbtree.h>
>  #include <linux/rcupdate.h>
>  
> @@ -31,6 +32,8 @@ struct i915_active {
>  	unsigned int count;
>  
>  	void (*retire)(struct i915_active *ref);
> +
> +	struct llist_head barriers;

This looks like it is generic. Are you planning to extend?

/* Preallocated slots of per engine barriers */

-Mika

>  };
>  
>  #endif /* _I915_ACTIVE_TYPES_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0d9282b673de..c0f5a00b659a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1197,10 +1197,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>  
>  	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
>  	intel_runtime_pm_put(i915, wakeref);
> -
> -	mutex_lock(&i915->drm.struct_mutex);
> -	i915_gem_contexts_lost(i915);
> -	mutex_unlock(&i915->drm.struct_mutex);
>  }
>  
>  void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 1cbc3ef4fc27..c2802bbb0cf6 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -213,18 +213,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
>  	spin_unlock(&rq->lock);
>  
>  	local_irq_enable();
> -
> -	/*
> -	 * The backing object for the context is done after switching to the
> -	 * *next* context. Therefore we cannot retire the previous context until
> -	 * the next context has already started running. However, since we
> -	 * cannot take the required locks at i915_request_submit() we
> -	 * defer the unpinning of the active context to now, retirement of
> -	 * the subsequent request.
> -	 */
> -	if (engine->last_retired_context)
> -		intel_context_unpin(engine->last_retired_context);
> -	engine->last_retired_context = rq->hw_context;
>  }
>  
>  static void __retire_engine_upto(struct intel_engine_cs *engine,
> @@ -759,9 +747,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>  
>  	rq->infix = rq->ring->emit; /* end of header; start of user payload */
>  
> -	/* Keep a second pin for the dual retirement along engine and ring */
> -	__intel_context_pin(ce);
> -
>  	intel_context_mark_active(ce);
>  	return rq;
>  
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 1e9ffced78c1..35c92d1db198 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -56,7 +56,6 @@ static void mock_device_release(struct drm_device *dev)
>  
>  	mutex_lock(&i915->drm.struct_mutex);
>  	mock_device_flush(i915);
> -	i915_gem_contexts_lost(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	flush_work(&i915->gem.idle_work);
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-14  9:22     ` Mika Kuoppala
@ 2019-06-14  9:34       ` Chris Wilson
  2019-06-14 10:18         ` Mika Kuoppala
  0 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2019-06-14  9:34 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-06-14 10:22:16)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> > index c78ec0b58e77..8e299c631575 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> > @@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce)
> >  
> >               i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
> >  
> > -             intel_context_get(ce);
> >               smp_mb__before_atomic(); /* flush pin before it is visible */
> >       }
> >  
> > @@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce)
> >               ce->ops->unpin(ce);
> >  
> >               i915_gem_context_put(ce->gem_context);
> > -             intel_context_put(ce);
> > +             intel_context_active_release(ce);
> 
> Not going to insist any change in naming but I was thinking
> here that we arm the barriers.

Not keen, not for changing just _release as we end up with _acquire/_arm
and that does not seem symmetrical.

_release_deferred() _release_barrier() perhaps, but no need to
differentiate yet. _release_barrier() winning so far.

> >       mutex_unlock(&ce->pin_mutex);
> >       intel_context_put(ce);
> >  }
> >  
> > -static void intel_context_retire(struct i915_active_request *active,
> > -                              struct i915_request *rq)
> > +static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
> >  {
> > -     struct intel_context *ce =
> > -             container_of(active, typeof(*ce), active_tracker);
> > +     int err;
> 
> Why not ret? I have started to removing errs. Am I swimming in upstream? :P

We've been replacing ret with err (where it makes more sense to ask "if
(error) do error_path;) for a few years. :-p

> > -     intel_context_unpin(ce);
> > +     err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
> > +     if (err)
> > +             return err;
> > +
> > +     /*
> > +      * And mark it as a globally pinned object to let the shrinker know
> > +      * it cannot reclaim the object until we release it.
> > +      */
> > +     vma->obj->pin_global++;
> > +     vma->obj->mm.dirty = true;
> > +
> > +     return 0;
> > +}

> > +int intel_context_active_acquire(struct intel_context *ce, unsigned long flags)
> > +{
> > +     int err;
> > +
> > +     if (!i915_active_acquire(&ce->active))
> > +             return 0;
> > +
> > +     intel_context_get(ce);
> > +
> > +     if (!ce->state)
> > +             return 0;
> > +
> > +     err = __context_pin_state(ce->state, flags);
> > +     if (err) {
> > +             i915_active_cancel(&ce->active);
> > +             intel_context_put(ce);
> > +             return err;
> > +     }
> > +
> > +     /* Preallocate tracking nodes */
> > +     if (!i915_gem_context_is_kernel(ce->gem_context)) {
> > +             err = i915_active_acquire_preallocate_barrier(&ce->active,
> > +                                                           ce->engine);
> > +             if (err) {
> > +                     i915_active_release(&ce->active);
> 
> For me it looks like we are missing context put in here.

Crazy huh :) We are at the point where it is safer to release than
unwind; i915_active_cancel is quite ugly.

It does get a bit simpler later on when we rewrite i915_active and
drive this as an acquire callback.

> > diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> > index b679253b53a5..c025991b9233 100644
> > --- a/drivers/gpu/drm/i915/i915_active_types.h
> > +++ b/drivers/gpu/drm/i915/i915_active_types.h
> > @@ -7,6 +7,7 @@
> >  #ifndef _I915_ACTIVE_TYPES_H_
> >  #define _I915_ACTIVE_TYPES_H_
> >  
> > +#include <linux/llist.h>
> >  #include <linux/rbtree.h>
> >  #include <linux/rcupdate.h>
> >  
> > @@ -31,6 +32,8 @@ struct i915_active {
> >       unsigned int count;
> >  
> >       void (*retire)(struct i915_active *ref);
> > +
> > +     struct llist_head barriers;
> 
> This looks like it is generic. Are you planning to extend?

Only user so far far. But i915_active is our answer to
reservation_object on steroids, so itself should be quite generic.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2)
  2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
                   ` (14 preceding siblings ...)
  2019-06-13  6:16 ` ✗ Fi.CI.IGT: failure for series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Patchwork
@ 2019-06-14  9:58 ` Patchwork
  15 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2019-06-14  9:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2)
URL   : https://patchwork.freedesktop.org/series/61946/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6251_full -> Patchwork_13254_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_13254_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_13254_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_13254_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_ctx_engines@execute-one:
    - shard-skl:          [PASS][1] -> [FAIL][2] +1 similar issue
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-skl10/igt@gem_ctx_engines@execute-one.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-skl1/igt@gem_ctx_engines@execute-one.html
    - shard-apl:          [PASS][3] -> [FAIL][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-apl2/igt@gem_ctx_engines@execute-one.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-apl6/igt@gem_ctx_engines@execute-one.html
    - shard-glk:          [PASS][5] -> [FAIL][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-glk3/igt@gem_ctx_engines@execute-one.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-glk1/igt@gem_ctx_engines@execute-one.html

  * igt@gem_exec_await@wide-contexts:
    - shard-kbl:          [PASS][7] -> [FAIL][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-kbl1/igt@gem_exec_await@wide-contexts.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-kbl7/igt@gem_exec_await@wide-contexts.html

  

### Piglit changes ###

#### Possible regressions ####

  * spec@arb_shader_image_load_store@shader-mem-barrier (NEW):
    - pig-glk-j5005:      NOTRUN -> [FAIL][9] +2 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/pig-glk-j5005/spec@arb_shader_image_load_store@shader-mem-barrier.html
    - pig-skl-6260u:      NOTRUN -> [FAIL][10]
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/pig-skl-6260u/spec@arb_shader_image_load_store@shader-mem-barrier.html

  
New tests
---------

  New tests have been introduced between CI_DRM_6251_full and Patchwork_13254_full:

### New Piglit tests (3) ###

  * spec@arb_shader_image_load_store@shader-mem-barrier:
    - Statuses : 2 fail(s)
    - Exec time: [0.16, 0.20] s

  * spec@ext_transform_feedback@order arrays points:
    - Statuses : 1 fail(s)
    - Exec time: [0.13] s

  * spec@glsl-1.30@execution@fs-execution-ordering:
    - Statuses : 1 fail(s)
    - Exec time: [0.62] s

  

Known issues
------------

  Here are the changes found in Patchwork_13254_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_persistent_relocs@forked-faulting-reloc-thrashing:
    - shard-snb:          [PASS][11] -> [DMESG-WARN][12] ([fdo#110789] / [fdo#110913 ])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-snb5/igt@gem_persistent_relocs@forked-faulting-reloc-thrashing.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-snb6/igt@gem_persistent_relocs@forked-faulting-reloc-thrashing.html

  * igt@gem_persistent_relocs@forked-thrashing:
    - shard-hsw:          [PASS][13] -> [DMESG-WARN][14] ([fdo#110789] / [fdo#110913 ])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-hsw6/igt@gem_persistent_relocs@forked-thrashing.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-hsw8/igt@gem_persistent_relocs@forked-thrashing.html
    - shard-kbl:          [PASS][15] -> [DMESG-WARN][16] ([fdo#110913 ])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-kbl2/igt@gem_persistent_relocs@forked-thrashing.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-kbl3/igt@gem_persistent_relocs@forked-thrashing.html

  * igt@gem_pwrite@big-cpu-random:
    - shard-glk:          [PASS][17] -> [INCOMPLETE][18] ([fdo#103359] / [k.org#198133])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-glk3/igt@gem_pwrite@big-cpu-random.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-glk9/igt@gem_pwrite@big-cpu-random.html

  * igt@gem_tiled_swapping@non-threaded:
    - shard-hsw:          [PASS][19] -> [FAIL][20] ([fdo#108686])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-hsw7/igt@gem_tiled_swapping@non-threaded.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-hsw1/igt@gem_tiled_swapping@non-threaded.html

  * igt@gem_userptr_blits@map-fixed-invalidate-busy:
    - shard-apl:          [PASS][21] -> [DMESG-WARN][22] ([fdo#110913 ])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-apl8/igt@gem_userptr_blits@map-fixed-invalidate-busy.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-apl8/igt@gem_userptr_blits@map-fixed-invalidate-busy.html

  * igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy:
    - shard-glk:          [PASS][23] -> [DMESG-WARN][24] ([fdo#110913 ]) +1 similar issue
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-glk3/igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-glk3/igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy.html

  * igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy-gup:
    - shard-skl:          [PASS][25] -> [DMESG-WARN][26] ([fdo#110913 ])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-skl9/igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy-gup.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-skl6/igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy-gup.html

  * igt@i915_pm_rc6_residency@rc6-accuracy:
    - shard-kbl:          [PASS][27] -> [SKIP][28] ([fdo#109271])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-kbl1/igt@i915_pm_rc6_residency@rc6-accuracy.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-kbl7/igt@i915_pm_rc6_residency@rc6-accuracy.html

  * igt@kms_atomic_transition@plane-all-transition-nonblocking-fencing:
    - shard-apl:          [PASS][29] -> [INCOMPLETE][30] ([fdo#103927])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-apl1/igt@kms_atomic_transition@plane-all-transition-nonblocking-fencing.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-apl1/igt@kms_atomic_transition@plane-all-transition-nonblocking-fencing.html

  * igt@kms_big_fb@x-tiled-32bpp-rotate-0:
    - shard-snb:          [PASS][31] -> [SKIP][32] ([fdo#109271]) +1 similar issue
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-snb6/igt@kms_big_fb@x-tiled-32bpp-rotate-0.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-snb5/igt@kms_big_fb@x-tiled-32bpp-rotate-0.html

  * igt@kms_cursor_crc@pipe-c-cursor-256x85-offscreen:
    - shard-skl:          [PASS][33] -> [FAIL][34] ([fdo#103232])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-skl10/igt@kms_cursor_crc@pipe-c-cursor-256x85-offscreen.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-skl1/igt@kms_cursor_crc@pipe-c-cursor-256x85-offscreen.html

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [PASS][35] -> [SKIP][36] ([fdo#109349])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb3/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  * igt@kms_flip@2x-flip-vs-absolute-wf_vblank:
    - shard-hsw:          [PASS][37] -> [SKIP][38] ([fdo#109271]) +19 similar issues
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-hsw8/igt@kms_flip@2x-flip-vs-absolute-wf_vblank.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-hsw1/igt@kms_flip@2x-flip-vs-absolute-wf_vblank.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-pwrite:
    - shard-iclb:         [PASS][39] -> [FAIL][40] ([fdo#103167]) +6 similar issues
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb4/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-pwrite.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb1/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-pwrite.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min:
    - shard-skl:          [PASS][41] -> [FAIL][42] ([fdo#108145]) +1 similar issue
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-skl6/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-skl9/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [PASS][43] -> [FAIL][44] ([fdo#108145] / [fdo#110403])
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-skl10/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-skl1/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_plane_lowres@pipe-a-tiling-y:
    - shard-iclb:         [PASS][45] -> [FAIL][46] ([fdo#103166])
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb7/igt@kms_plane_lowres@pipe-a-tiling-y.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb7/igt@kms_plane_lowres@pipe-a-tiling-y.html

  * igt@kms_psr@no_drrs:
    - shard-iclb:         [PASS][47] -> [FAIL][48] ([fdo#108341])
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb4/igt@kms_psr@no_drrs.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb1/igt@kms_psr@no_drrs.html

  * igt@kms_psr@psr2_primary_mmap_cpu:
    - shard-iclb:         [PASS][49] -> [SKIP][50] ([fdo#109441]) +3 similar issues
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb2/igt@kms_psr@psr2_primary_mmap_cpu.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb4/igt@kms_psr@psr2_primary_mmap_cpu.html

  * igt@kms_setmode@basic:
    - shard-glk:          [PASS][51] -> [FAIL][52] ([fdo#99912])
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-glk2/igt@kms_setmode@basic.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-glk2/igt@kms_setmode@basic.html

  * igt@kms_sysfs_edid_timing:
    - shard-iclb:         [PASS][53] -> [FAIL][54] ([fdo#100047])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb4/igt@kms_sysfs_edid_timing.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb2/igt@kms_sysfs_edid_timing.html

  * igt@kms_vblank@pipe-a-ts-continuation-suspend:
    - shard-apl:          [PASS][55] -> [DMESG-WARN][56] ([fdo#108566]) +3 similar issues
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-apl2/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-apl6/igt@kms_vblank@pipe-a-ts-continuation-suspend.html

  
#### Possible fixes ####

  * igt@gem_eio@in-flight-external:
    - shard-glk:          [DMESG-WARN][57] ([fdo#110913 ]) -> [PASS][58] +1 similar issue
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-glk2/igt@gem_eio@in-flight-external.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-glk6/igt@gem_eio@in-flight-external.html

  * igt@gem_exec_schedule@semaphore-resolve:
    - shard-skl:          [FAIL][59] ([fdo#110519]) -> [PASS][60]
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-skl10/igt@gem_exec_schedule@semaphore-resolve.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-skl1/igt@gem_exec_schedule@semaphore-resolve.html
    - shard-kbl:          [FAIL][61] ([fdo#110519]) -> [PASS][62]
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-kbl7/igt@gem_exec_schedule@semaphore-resolve.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-kbl6/igt@gem_exec_schedule@semaphore-resolve.html
    - shard-apl:          [FAIL][63] ([fdo#110519]) -> [PASS][64]
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-apl2/igt@gem_exec_schedule@semaphore-resolve.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-apl6/igt@gem_exec_schedule@semaphore-resolve.html
    - shard-glk:          [FAIL][65] ([fdo#110519]) -> [PASS][66]
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-glk3/igt@gem_exec_schedule@semaphore-resolve.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-glk1/igt@gem_exec_schedule@semaphore-resolve.html
    - shard-iclb:         [FAIL][67] ([fdo#110519]) -> [PASS][68]
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb6/igt@gem_exec_schedule@semaphore-resolve.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb4/igt@gem_exec_schedule@semaphore-resolve.html

  * igt@gem_persistent_relocs@forked-faulting-reloc-thrashing:
    - shard-hsw:          [DMESG-WARN][69] ([fdo#110789] / [fdo#110913 ]) -> [PASS][70]
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-hsw8/igt@gem_persistent_relocs@forked-faulting-reloc-thrashing.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-hsw5/igt@gem_persistent_relocs@forked-faulting-reloc-thrashing.html

  * igt@gem_tiled_swapping@non-threaded:
    - shard-glk:          [DMESG-WARN][71] ([fdo#108686]) -> [PASS][72]
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-glk4/igt@gem_tiled_swapping@non-threaded.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-glk8/igt@gem_tiled_swapping@non-threaded.html

  * igt@gem_userptr_blits@map-fixed-invalidate-busy:
    - shard-skl:          [DMESG-WARN][73] ([fdo#110913 ]) -> [PASS][74] +1 similar issue
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-skl7/igt@gem_userptr_blits@map-fixed-invalidate-busy.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-skl4/igt@gem_userptr_blits@map-fixed-invalidate-busy.html

  * igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy:
    - shard-snb:          [DMESG-WARN][75] ([fdo#110913 ]) -> [PASS][76]
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-snb1/igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-snb1/igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy.html
    - shard-kbl:          [DMESG-WARN][77] ([fdo#110913 ]) -> [PASS][78]
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-kbl6/igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-kbl2/igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy.html

  * igt@gem_userptr_blits@sync-unmap-cycles:
    - shard-apl:          [DMESG-WARN][79] ([fdo#110913 ]) -> [PASS][80] +1 similar issue
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-apl6/igt@gem_userptr_blits@sync-unmap-cycles.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-apl6/igt@gem_userptr_blits@sync-unmap-cycles.html

  * igt@kms_draw_crc@draw-method-xrgb8888-mmap-gtt-ytiled:
    - shard-iclb:         [FAIL][81] ([fdo#103184] / [fdo#103232]) -> [PASS][82]
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb1/igt@kms_draw_crc@draw-method-xrgb8888-mmap-gtt-ytiled.html
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb1/igt@kms_draw_crc@draw-method-xrgb8888-mmap-gtt-ytiled.html

  * igt@kms_flip@flip-vs-expired-vblank:
    - shard-skl:          [FAIL][83] ([fdo#105363]) -> [PASS][84] +1 similar issue
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-skl7/igt@kms_flip@flip-vs-expired-vblank.html
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-skl8/igt@kms_flip@flip-vs-expired-vblank.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-skl:          [INCOMPLETE][85] ([fdo#109507]) -> [PASS][86]
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-skl1/igt@kms_flip@flip-vs-suspend-interruptible.html
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-skl5/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-draw-pwrite:
    - shard-hsw:          [SKIP][87] ([fdo#109271]) -> [PASS][88] +21 similar issues
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-hsw1/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-draw-pwrite.html
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-hsw2/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-draw-pwrite.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-kbl:          [DMESG-WARN][89] ([fdo#108566]) -> [PASS][90]
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-kbl1/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-kbl2/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-render:
    - shard-iclb:         [FAIL][91] ([fdo#103167]) -> [PASS][92] +3 similar issues
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb5/igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-render.html
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb6/igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-render.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - shard-apl:          [DMESG-WARN][93] ([fdo#108566]) -> [PASS][94]
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-apl7/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-apl3/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  * igt@kms_psr2_su@page_flip:
    - shard-iclb:         [SKIP][95] ([fdo#109642]) -> [PASS][96]
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb3/igt@kms_psr2_su@page_flip.html
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb2/igt@kms_psr2_su@page_flip.html

  * igt@kms_psr@psr2_no_drrs:
    - shard-iclb:         [SKIP][97] ([fdo#109441]) -> [PASS][98]
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6251/shard-iclb4/igt@kms_psr@psr2_no_drrs.html
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/shard-iclb2/igt@kms_psr@psr2_no_drrs.html

  
  [fdo#100047]: https://bugs.freedesktop.org/show_bug.cgi?id=100047
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103184]: https://bugs.freedesktop.org/show_bug.cgi?id=103184
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108341]: https://bugs.freedesktop.org/show_bug.cgi?id=108341
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108686]: https://bugs.freedesktop.o

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13254/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-06-14  9:34       ` Chris Wilson
@ 2019-06-14 10:18         ` Mika Kuoppala
  0 siblings, 0 replies; 31+ messages in thread
From: Mika Kuoppala @ 2019-06-14 10:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-06-14 10:22:16)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
>> > index c78ec0b58e77..8e299c631575 100644
>> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
>> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
>> > @@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce)
>> >  
>> >               i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
>> >  
>> > -             intel_context_get(ce);
>> >               smp_mb__before_atomic(); /* flush pin before it is visible */
>> >       }
>> >  
>> > @@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce)
>> >               ce->ops->unpin(ce);
>> >  
>> >               i915_gem_context_put(ce->gem_context);
>> > -             intel_context_put(ce);
>> > +             intel_context_active_release(ce);
>> 
>> Not going to insist any change in naming but I was thinking
>> here that we arm the barriers.
>
> Not keen, not for changing just _release as we end up with _acquire/_arm
> and that does not seem symmetrical.
>
> _release_deferred() _release_barrier() perhaps, but no need to
> differentiate yet. _release_barrier() winning so far.
>
>> >       mutex_unlock(&ce->pin_mutex);
>> >       intel_context_put(ce);
>> >  }
>> >  
>> > -static void intel_context_retire(struct i915_active_request *active,
>> > -                              struct i915_request *rq)
>> > +static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
>> >  {
>> > -     struct intel_context *ce =
>> > -             container_of(active, typeof(*ce), active_tracker);
>> > +     int err;
>> 
>> Why not ret? I have started to removing errs. Am I swimming in upstream? :P
>
> We've been replacing ret with err (where it makes more sense to ask "if
> (error) do error_path;) for a few years. :-p
>
>> > -     intel_context_unpin(ce);
>> > +     err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
>> > +     if (err)
>> > +             return err;
>> > +
>> > +     /*
>> > +      * And mark it as a globally pinned object to let the shrinker know
>> > +      * it cannot reclaim the object until we release it.
>> > +      */
>> > +     vma->obj->pin_global++;
>> > +     vma->obj->mm.dirty = true;
>> > +
>> > +     return 0;
>> > +}
>
>> > +int intel_context_active_acquire(struct intel_context *ce, unsigned long flags)
>> > +{
>> > +     int err;
>> > +
>> > +     if (!i915_active_acquire(&ce->active))
>> > +             return 0;
>> > +
>> > +     intel_context_get(ce);
>> > +
>> > +     if (!ce->state)
>> > +             return 0;
>> > +
>> > +     err = __context_pin_state(ce->state, flags);
>> > +     if (err) {
>> > +             i915_active_cancel(&ce->active);
>> > +             intel_context_put(ce);
>> > +             return err;
>> > +     }
>> > +
>> > +     /* Preallocate tracking nodes */
>> > +     if (!i915_gem_context_is_kernel(ce->gem_context)) {
>> > +             err = i915_active_acquire_preallocate_barrier(&ce->active,
>> > +                                                           ce->engine);
>> > +             if (err) {
>> > +                     i915_active_release(&ce->active);
>> 
>> For me it looks like we are missing context put in here.
>
> Crazy huh :) We are at the point where it is safer to release than
> unwind; i915_active_cancel is quite ugly.
>

Ok so the retirement of active releases the context ref we have.

And you add to the ref->count on moving to the barriers list so
partially done engine masks should still be covered.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> It does get a bit simpler later on when we rewrite i915_active and
> drive this as an acquire callback.
>
>> > diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
>> > index b679253b53a5..c025991b9233 100644
>> > --- a/drivers/gpu/drm/i915/i915_active_types.h
>> > +++ b/drivers/gpu/drm/i915/i915_active_types.h
>> > @@ -7,6 +7,7 @@
>> >  #ifndef _I915_ACTIVE_TYPES_H_
>> >  #define _I915_ACTIVE_TYPES_H_
>> >  
>> > +#include <linux/llist.h>
>> >  #include <linux/rbtree.h>
>> >  #include <linux/rcupdate.h>
>> >  
>> > @@ -31,6 +32,8 @@ struct i915_active {
>> >       unsigned int count;
>> >  
>> >       void (*retire)(struct i915_active *ref);
>> > +
>> > +     struct llist_head barriers;
>> 
>> This looks like it is generic. Are you planning to extend?
>
> Only user so far far. But i915_active is our answer to
> reservation_object on steroids, so itself should be quite generic.
> -Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/8] drm/i915: Stop retiring along engine
  2019-06-12  9:31 ` [PATCH 2/8] drm/i915: Stop retiring along engine Chris Wilson
@ 2019-06-14 14:23   ` Mika Kuoppala
  0 siblings, 0 replies; 31+ messages in thread
From: Mika Kuoppala @ 2019-06-14 14:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We no longer track the execution order along the engine and so no longer
> need to enforce ordering of retire along the engine.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_request.c | 128 +++++++++++-----------------
>  1 file changed, 52 insertions(+), 76 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 9eff9de7fa10..9c58ae6e4afb 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -183,72 +183,23 @@ static void free_capture_list(struct i915_request *request)
>  	}
>  }
>  
> -static void __retire_engine_request(struct intel_engine_cs *engine,
> -				    struct i915_request *rq)
> -{
> -	GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n",
> -		  __func__, engine->name,
> -		  rq->fence.context, rq->fence.seqno,
> -		  hwsp_seqno(rq));
> -
> -	GEM_BUG_ON(!i915_request_completed(rq));
> -
> -	local_irq_disable();
> -
> -	spin_lock(&engine->timeline.lock);
> -	GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
> -	list_del_init(&rq->link);
> -	spin_unlock(&engine->timeline.lock);
> -
> -	spin_lock(&rq->lock);
> -	i915_request_mark_complete(rq);
> -	if (!i915_request_signaled(rq))
> -		dma_fence_signal_locked(&rq->fence);
> -	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
> -		i915_request_cancel_breadcrumb(rq);
> -	if (rq->waitboost) {
> -		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
> -		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
> -	}
> -	spin_unlock(&rq->lock);
> -
> -	local_irq_enable();
> -}
> -
> -static void __retire_engine_upto(struct intel_engine_cs *engine,
> -				 struct i915_request *rq)
> -{
> -	struct i915_request *tmp;
> -
> -	if (list_empty(&rq->link))
> -		return;
> -
> -	do {
> -		tmp = list_first_entry(&engine->timeline.requests,
> -				       typeof(*tmp), link);
> -
> -		GEM_BUG_ON(tmp->engine != engine);
> -		__retire_engine_request(engine, tmp);
> -	} while (tmp != rq);
> -}
> -
> -static void i915_request_retire(struct i915_request *request)
> +static bool i915_request_retire(struct i915_request *rq)
>  {
>  	struct i915_active_request *active, *next;
>  
> -	GEM_TRACE("%s fence %llx:%lld, current %d\n",
> -		  request->engine->name,
> -		  request->fence.context, request->fence.seqno,
> -		  hwsp_seqno(request));
> +	lockdep_assert_held(&rq->i915->drm.struct_mutex);
> +	if (!i915_request_completed(rq))
> +		return false;
>  
> -	lockdep_assert_held(&request->i915->drm.struct_mutex);
> -	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
> -	GEM_BUG_ON(!i915_request_completed(request));
> +	GEM_TRACE("%s fence %llx:%lld, current %d\n",
> +		  rq->engine->name,
> +		  rq->fence.context, rq->fence.seqno,
> +		  hwsp_seqno(rq));
>  
> -	trace_i915_request_retire(request);
> +	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
> +	trace_i915_request_retire(rq);
>  
> -	advance_ring(request);
> -	free_capture_list(request);
> +	advance_ring(rq);
>  
>  	/*
>  	 * Walk through the active list, calling retire on each. This allows
> @@ -260,7 +211,7 @@ static void i915_request_retire(struct i915_request *request)
>  	 * pass along the auxiliary information (to avoid dereferencing
>  	 * the node after the callback).
>  	 */
> -	list_for_each_entry_safe(active, next, &request->active_list, link) {
> +	list_for_each_entry_safe(active, next, &rq->active_list, link) {
>  		/*
>  		 * In microbenchmarks or focusing upon time inside the kernel,
>  		 * we may spend an inordinate amount of time simply handling
> @@ -276,18 +227,39 @@ static void i915_request_retire(struct i915_request *request)
>  		INIT_LIST_HEAD(&active->link);
>  		RCU_INIT_POINTER(active->request, NULL);
>  
> -		active->retire(active, request);
> +		active->retire(active, rq);
> +	}
> +
> +	local_irq_disable();
> +
> +	spin_lock(&rq->engine->timeline.lock);
> +	list_del(&rq->link);
> +	spin_unlock(&rq->engine->timeline.lock);
> +
> +	spin_lock(&rq->lock);
> +	i915_request_mark_complete(rq);
> +	if (!i915_request_signaled(rq))
> +		dma_fence_signal_locked(&rq->fence);
> +	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
> +		i915_request_cancel_breadcrumb(rq);
> +	if (rq->waitboost) {
> +		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
> +		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
>  	}
> +	spin_unlock(&rq->lock);
> +
> +	local_irq_enable();
>  
> -	i915_request_remove_from_client(request);
> +	intel_context_exit(rq->hw_context);
> +	intel_context_unpin(rq->hw_context);
>  
> -	__retire_engine_upto(request->engine, request);
> +	i915_request_remove_from_client(rq);
>  
> -	intel_context_exit(request->hw_context);
> -	intel_context_unpin(request->hw_context);
> +	free_capture_list(rq);
> +	i915_sched_node_fini(&rq->sched);
> +	i915_request_put(rq);
>  
> -	i915_sched_node_fini(&request->sched);
> -	i915_request_put(request);
> +	return true;
>  }
>  
>  void i915_request_retire_upto(struct i915_request *rq)
> @@ -309,9 +281,7 @@ void i915_request_retire_upto(struct i915_request *rq)
>  	do {
>  		tmp = list_first_entry(&ring->request_list,
>  				       typeof(*tmp), ring_link);
> -
> -		i915_request_retire(tmp);
> -	} while (tmp != rq);
> +	} while (i915_request_retire(tmp) && tmp != rq);

The semantics does change a little for this function. But
looking at the callsites it doesn't matter.

>  }
>  
>  static void irq_execute_cb(struct irq_work *wrk)
> @@ -600,12 +570,9 @@ static void ring_retire_requests(struct intel_ring *ring)
>  {
>  	struct i915_request *rq, *rn;
>  
> -	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) {
> -		if (!i915_request_completed(rq))
> +	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
> +		if (!i915_request_retire(rq))
>  			break;
> -
> -		i915_request_retire(rq);
> -	}
>  }
>  
>  static noinline struct i915_request *
> @@ -620,6 +587,15 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
>  	if (!gfpflags_allow_blocking(gfp))
>  		goto out;
>  
> +	/* Move our oldest request to the slab-cache (if not in use!) */
> +	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
> +	i915_request_retire(rq);

Ok this is just for kick.

> +
> +	rq = kmem_cache_alloc(global.slab_requests,
> +			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);

Only one callsite for this so you go cleaner by using gfp only if
you so desire.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +	if (rq)
> +		return rq;
> +
>  	/* Ratelimit ourselves to prevent oom from malicious clients */
>  	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
>  	cond_synchronize_rcu(rq->rcustate);
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list
  2019-06-12  9:31 ` [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list Chris Wilson
@ 2019-06-14 14:34   ` Mika Kuoppala
  2019-06-14 14:44     ` Chris Wilson
  2019-06-14 15:50   ` Mika Kuoppala
  1 sibling, 1 reply; 31+ messages in thread
From: Mika Kuoppala @ 2019-06-14 14:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> To continue the onslaught of removing the assumption of a global
> execution ordering, another casualty is the engine->timeline. Without an
> actual timeline to track, it is overkill and we can replace it with a
> much less grand plain list. We still need a list of requests inflight,
> for the simple purpose of finding inflight requests (for retiring,
> resetting, preemption etc).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine.h        |  6 ++
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 62 ++++++------
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  6 +-
>  drivers/gpu/drm/i915/gt/intel_lrc.c           | 95 ++++++++++---------
>  drivers/gpu/drm/i915/gt/intel_reset.c         | 10 +-
>  drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 15 ++-
>  drivers/gpu/drm/i915/gt/mock_engine.c         | 18 ++--
>  drivers/gpu/drm/i915/i915_gpu_error.c         |  5 +-
>  drivers/gpu/drm/i915/i915_request.c           | 43 +++------
>  drivers/gpu/drm/i915/i915_request.h           |  2 +-
>  drivers/gpu/drm/i915/i915_scheduler.c         | 38 ++++----
>  drivers/gpu/drm/i915/i915_timeline.c          |  1 -
>  drivers/gpu/drm/i915/i915_timeline.h          | 19 ----
>  drivers/gpu/drm/i915/i915_timeline_types.h    |  4 -
>  drivers/gpu/drm/i915/intel_guc_submission.c   | 16 ++--
>  .../gpu/drm/i915/selftests/mock_timeline.c    |  1 -
>  16 files changed, 153 insertions(+), 188 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index b9fd88f21609..6be607e9c084 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -564,4 +564,10 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
>  
>  #endif
>  
> +void intel_engine_init_active(struct intel_engine_cs *engine,
> +			      unsigned int subclass);
> +#define ENGINE_PHYSICAL	0
> +#define ENGINE_MOCK	1
> +#define ENGINE_VIRTUAL	2
> +
>  #endif /* _INTEL_RINGBUFFER_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 5a08036ae774..01f50cfd517c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -617,14 +617,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
>  	if (err)
>  		return err;
>  
> -	err = i915_timeline_init(engine->i915,
> -				 &engine->timeline,
> -				 engine->status_page.vma);
> -	if (err)
> -		goto err_hwsp;
> -
> -	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
> -
> +	intel_engine_init_active(engine, ENGINE_PHYSICAL);
>  	intel_engine_init_breadcrumbs(engine);
>  	intel_engine_init_execlists(engine);
>  	intel_engine_init_hangcheck(engine);
> @@ -637,10 +630,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
>  		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
>  
>  	return 0;
> -
> -err_hwsp:
> -	cleanup_status_page(engine);
> -	return err;
>  }
>  
>  /**
> @@ -797,6 +786,27 @@ static int pin_context(struct i915_gem_context *ctx,
>  	return 0;
>  }
>  
> +void
> +intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
> +{
> +	INIT_LIST_HEAD(&engine->active.requests);
> +
> +	spin_lock_init(&engine->active.lock);
> +	lockdep_set_subclass(&engine->active.lock, subclass);
> +
> +	/*
> +	 * Due to an interesting quirk in lockdep's internal debug tracking,
> +	 * after setting a subclass we must ensure the lock is used. Otherwise,
> +	 * nr_unused_locks is incremented once too often.
> +	 */
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	local_irq_disable();
> +	lock_map_acquire(&engine->active.lock.dep_map);
> +	lock_map_release(&engine->active.lock.dep_map);
> +	local_irq_enable();
> +#endif
> +}
> +
>  /**
>   * intel_engines_init_common - initialize cengine state which might require hw access
>   * @engine: Engine to initialize.
> @@ -860,6 +870,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>   */
>  void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>  {
> +	GEM_BUG_ON(!list_empty(&engine->active.requests));
> +
>  	cleanup_status_page(engine);
>  
>  	intel_engine_fini_breadcrumbs(engine);
> @@ -874,8 +886,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>  	intel_context_unpin(engine->kernel_context);
>  	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
>  
> -	i915_timeline_fini(&engine->timeline);
> -
>  	intel_wa_list_free(&engine->ctx_wa_list);
>  	intel_wa_list_free(&engine->wa_list);
>  	intel_wa_list_free(&engine->whitelist);
> @@ -1482,16 +1492,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>  
>  	drm_printf(m, "\tRequests:\n");
>  
> -	rq = list_first_entry(&engine->timeline.requests,
> -			      struct i915_request, link);
> -	if (&rq->link != &engine->timeline.requests)
> -		print_request(m, rq, "\t\tfirst  ");
> -
> -	rq = list_last_entry(&engine->timeline.requests,
> -			     struct i915_request, link);
> -	if (&rq->link != &engine->timeline.requests)
> -		print_request(m, rq, "\t\tlast   ");
> -
>  	rq = intel_engine_find_active_request(engine);
>  	if (rq) {
>  		print_request(m, rq, "\t\tactive ");
> @@ -1572,7 +1572,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>  	if (!intel_engine_supports_stats(engine))
>  		return -ENODEV;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  	write_seqlock(&engine->stats.lock);
>  
>  	if (unlikely(engine->stats.enabled == ~0)) {
> @@ -1598,7 +1598,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>  
>  unlock:
>  	write_sequnlock(&engine->stats.lock);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  
>  	return err;
>  }
> @@ -1683,22 +1683,22 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
>  	 * At all other times, we must assume the GPU is still running, but
>  	 * we only care about the snapshot of this moment.
>  	 */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> -	list_for_each_entry(request, &engine->timeline.requests, link) {
> +	spin_lock_irqsave(&engine->active.lock, flags);
> +	list_for_each_entry(request, &engine->active.requests, sched.link) {
>  		if (i915_request_completed(request))
>  			continue;
>  
>  		if (!i915_request_started(request))
> -			break;
> +			continue;
>  
>  		/* More than one preemptible request may match! */
>  		if (!match_ring(request))
> -			break;
> +			continue;
>  
>  		active = request;
>  		break;
>  	}
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  
>  	return active;
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 33a31aa2d2ae..b2faca8e5dec 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -288,7 +288,11 @@ struct intel_engine_cs {
>  
>  	struct intel_ring *buffer;
>  
> -	struct i915_timeline timeline;
> +	struct {
> +		spinlock_t lock;
> +		struct list_head requests;
> +	} active;
> +
>  	struct llist_head barrier_tasks;
>  
>  	struct intel_context *kernel_context; /* pinned */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 05524489615c..853376895505 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -298,8 +298,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>  	 * Check against the first request in ELSP[1], it will, thanks to the
>  	 * power of PI, be the highest priority of that context.
>  	 */
> -	if (!list_is_last(&rq->link, &engine->timeline.requests) &&
> -	    rq_prio(list_next_entry(rq, link)) > last_prio)
> +	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
> +	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
>  		return true;
>  
>  	if (rb) {
> @@ -434,11 +434,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>  	struct list_head *uninitialized_var(pl);
>  	int prio = I915_PRIORITY_INVALID;
>  
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	list_for_each_entry_safe_reverse(rq, rn,
> -					 &engine->timeline.requests,
> -					 link) {
> +					 &engine->active.requests,
> +					 sched.link) {
>  		struct intel_engine_cs *owner;
>  
>  		if (i915_request_completed(rq))
> @@ -465,7 +465,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>  			}
>  			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>  
> -			list_add(&rq->sched.link, pl);
> +			list_move(&rq->sched.link, pl);
>  			active = rq;
>  		} else {
>  			rq->engine = owner;
> @@ -933,11 +933,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
>  		struct i915_request *rq;
>  
> -		spin_lock(&ve->base.timeline.lock);
> +		spin_lock(&ve->base.active.lock);
>  
>  		rq = ve->request;
>  		if (unlikely(!rq)) { /* lost the race to a sibling */
> -			spin_unlock(&ve->base.timeline.lock);
> +			spin_unlock(&ve->base.active.lock);
>  			rb_erase_cached(rb, &execlists->virtual);
>  			RB_CLEAR_NODE(rb);
>  			rb = rb_first_cached(&execlists->virtual);
> @@ -950,13 +950,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  
>  		if (rq_prio(rq) >= queue_prio(execlists)) {
>  			if (!virtual_matches(ve, rq, engine)) {
> -				spin_unlock(&ve->base.timeline.lock);
> +				spin_unlock(&ve->base.active.lock);
>  				rb = rb_next(rb);
>  				continue;
>  			}
>  
>  			if (last && !can_merge_rq(last, rq)) {
> -				spin_unlock(&ve->base.timeline.lock);
> +				spin_unlock(&ve->base.active.lock);
>  				return; /* leave this rq for another engine */
>  			}
>  
> @@ -1011,7 +1011,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  			last = rq;
>  		}
>  
> -		spin_unlock(&ve->base.timeline.lock);
> +		spin_unlock(&ve->base.active.lock);
>  		break;
>  	}
>  
> @@ -1068,8 +1068,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  				GEM_BUG_ON(port_isset(port));
>  			}
>  
> -			list_del_init(&rq->sched.link);
> -
>  			__i915_request_submit(rq);
>  			trace_i915_request_in(rq, port_index(port, execlists));
>  
> @@ -1170,7 +1168,7 @@ static void process_csb(struct intel_engine_cs *engine)
>  	const u8 num_entries = execlists->csb_size;
>  	u8 head, tail;
>  
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	/*
>  	 * Note that csb_write, csb_status may be either in HWSP or mmio.
> @@ -1330,7 +1328,7 @@ static void process_csb(struct intel_engine_cs *engine)
>  
>  static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
>  {
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	process_csb(engine);
>  	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> @@ -1351,15 +1349,16 @@ static void execlists_submission_tasklet(unsigned long data)
>  		  !!intel_wakeref_active(&engine->wakeref),
>  		  engine->execlists.active);
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  	__execlists_submission_tasklet(engine);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void queue_request(struct intel_engine_cs *engine,
>  			  struct i915_sched_node *node,
>  			  int prio)
>  {
> +	GEM_BUG_ON(!list_empty(&node->link));
>  	list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
>  }
>  
> @@ -1390,7 +1389,7 @@ static void execlists_submit_request(struct i915_request *request)
>  	unsigned long flags;
>  
>  	/* Will be called from irq-context when using foreign fences. */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	queue_request(engine, &request->sched, rq_prio(request));
>  
> @@ -1399,7 +1398,7 @@ static void execlists_submit_request(struct i915_request *request)
>  
>  	submit_queue(engine, rq_prio(request));
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void __execlists_context_fini(struct intel_context *ce)
> @@ -2050,8 +2049,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
>  	intel_engine_stop_cs(engine);
>  
>  	/* And flush any current direct submission. */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static bool lrc_regs_ok(const struct i915_request *rq)
> @@ -2094,11 +2093,11 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
>  
>  static struct i915_request *active_request(struct i915_request *rq)
>  {
> -	const struct list_head * const list = &rq->engine->timeline.requests;
> +	const struct list_head * const list = &rq->engine->active.requests;
>  	const struct intel_context * const context = rq->hw_context;
>  	struct i915_request *active = NULL;
>  
> -	list_for_each_entry_from_reverse(rq, list, link) {
> +	list_for_each_entry_from_reverse(rq, list, sched.link) {
>  		if (i915_request_completed(rq))
>  			break;
>  
> @@ -2215,11 +2214,11 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
>  
>  	GEM_TRACE("%s\n", engine->name);
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	__execlists_reset(engine, stalled);
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void nop_submission_tasklet(unsigned long data)
> @@ -2250,12 +2249,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  	 * submission's irq state, we also wish to remind ourselves that
>  	 * it is irq state.)
>  	 */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	__execlists_reset(engine, true);
>  
>  	/* Mark all executing requests as skipped. */
> -	list_for_each_entry(rq, &engine->timeline.requests, link) {
> +	list_for_each_entry(rq, &engine->active.requests, sched.link) {
>  		if (!i915_request_signaled(rq))
>  			dma_fence_set_error(&rq->fence, -EIO);
>  
> @@ -2286,7 +2285,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  		rb_erase_cached(rb, &execlists->virtual);
>  		RB_CLEAR_NODE(rb);
>  
> -		spin_lock(&ve->base.timeline.lock);
> +		spin_lock(&ve->base.active.lock);
>  		if (ve->request) {
>  			ve->request->engine = engine;
>  			__i915_request_submit(ve->request);
> @@ -2295,7 +2294,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  			ve->base.execlists.queue_priority_hint = INT_MIN;
>  			ve->request = NULL;
>  		}
> -		spin_unlock(&ve->base.timeline.lock);
> +		spin_unlock(&ve->base.active.lock);
>  	}
>  
>  	/* Remaining _unready_ requests will be nop'ed when submitted */
> @@ -2307,7 +2306,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
>  	execlists->tasklet.func = nop_submission_tasklet;
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void execlists_reset_finish(struct intel_engine_cs *engine)
> @@ -3010,12 +3009,18 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
>  	return ret;
>  }
>  
> +static struct list_head *virtual_queue(struct virtual_engine *ve)
> +{
> +	return &ve->base.execlists.default_priolist.requests[0];
> +}
> +
>  static void virtual_context_destroy(struct kref *kref)
>  {
>  	struct virtual_engine *ve =
>  		container_of(kref, typeof(*ve), context.ref);
>  	unsigned int n;
>  
> +	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
>  	GEM_BUG_ON(ve->request);
>  	GEM_BUG_ON(ve->context.inflight);
>  
> @@ -3026,13 +3031,13 @@ static void virtual_context_destroy(struct kref *kref)
>  		if (RB_EMPTY_NODE(node))
>  			continue;
>  
> -		spin_lock_irq(&sibling->timeline.lock);
> +		spin_lock_irq(&sibling->active.lock);
>  
>  		/* Detachment is lazily performed in the execlists tasklet */
>  		if (!RB_EMPTY_NODE(node))
>  			rb_erase_cached(node, &sibling->execlists.virtual);
>  
> -		spin_unlock_irq(&sibling->timeline.lock);
> +		spin_unlock_irq(&sibling->active.lock);
>  	}
>  	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
>  
> @@ -3040,8 +3045,6 @@ static void virtual_context_destroy(struct kref *kref)
>  		__execlists_context_fini(&ve->context);
>  
>  	kfree(ve->bonds);
> -
> -	i915_timeline_fini(&ve->base.timeline);
>  	kfree(ve);
>  }
>  
> @@ -3161,16 +3164,16 @@ static void virtual_submission_tasklet(unsigned long data)
>  
>  		if (unlikely(!(mask & sibling->mask))) {
>  			if (!RB_EMPTY_NODE(&node->rb)) {
> -				spin_lock(&sibling->timeline.lock);
> +				spin_lock(&sibling->active.lock);
>  				rb_erase_cached(&node->rb,
>  						&sibling->execlists.virtual);
>  				RB_CLEAR_NODE(&node->rb);
> -				spin_unlock(&sibling->timeline.lock);
> +				spin_unlock(&sibling->active.lock);
>  			}
>  			continue;
>  		}
>  
> -		spin_lock(&sibling->timeline.lock);
> +		spin_lock(&sibling->active.lock);
>  
>  		if (!RB_EMPTY_NODE(&node->rb)) {
>  			/*
> @@ -3214,7 +3217,7 @@ static void virtual_submission_tasklet(unsigned long data)
>  			tasklet_hi_schedule(&sibling->execlists.tasklet);
>  		}
>  
> -		spin_unlock(&sibling->timeline.lock);
> +		spin_unlock(&sibling->active.lock);
>  	}
>  	local_irq_enable();
>  }
> @@ -3231,9 +3234,13 @@ static void virtual_submit_request(struct i915_request *rq)
>  	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
>  
>  	GEM_BUG_ON(ve->request);
> +	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
> +
>  	ve->base.execlists.queue_priority_hint = rq_prio(rq);
>  	WRITE_ONCE(ve->request, rq);
>  
> +	list_move_tail(&rq->sched.link, virtual_queue(ve));
> +
>  	tasklet_schedule(&ve->base.execlists.tasklet);
>  }
>  
> @@ -3297,10 +3304,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>  
>  	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
>  
> -	err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
> -	if (err)
> -		goto err_put;
> -	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
> +	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
>  
>  	intel_engine_init_execlists(&ve->base);
>  
> @@ -3311,6 +3315,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>  	ve->base.submit_request = virtual_submit_request;
>  	ve->base.bond_execute = virtual_bond_execute;
>  
> +	INIT_LIST_HEAD(virtual_queue(ve));
>  	ve->base.execlists.queue_priority_hint = INT_MIN;
>  	tasklet_init(&ve->base.execlists.tasklet,
>  		     virtual_submission_tasklet,
> @@ -3465,11 +3470,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>  	unsigned int count;
>  	struct rb_node *rb;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	last = NULL;
>  	count = 0;
> -	list_for_each_entry(rq, &engine->timeline.requests, link) {
> +	list_for_each_entry(rq, &engine->active.requests, sched.link) {
>  		if (count++ < max - 1)
>  			show_request(m, rq, "\t\tE ");
>  		else
> @@ -3532,7 +3537,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>  		show_request(m, last, "\t\tV ");
>  	}
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  void intel_lr_context_reset(struct intel_engine_cs *engine,
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 60d24110af80..cf258ec38ba6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -49,12 +49,12 @@ static void engine_skip_context(struct i915_request *rq)
>  	struct intel_engine_cs *engine = rq->engine;
>  	struct i915_gem_context *hung_ctx = rq->gem_context;
>  
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	if (!i915_request_is_active(rq))
>  		return;
>  
> -	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
> +	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
>  		if (rq->gem_context == hung_ctx)
>  			i915_request_skip(rq, -EIO);
>  }
> @@ -130,7 +130,7 @@ void i915_reset_request(struct i915_request *rq, bool guilty)
>  		  rq->fence.seqno,
>  		  yesno(guilty));
>  
> -	lockdep_assert_held(&rq->engine->timeline.lock);
> +	lockdep_assert_held(&rq->engine->active.lock);
>  	GEM_BUG_ON(i915_request_completed(rq));
>  
>  	if (guilty) {
> @@ -785,10 +785,10 @@ static void nop_submit_request(struct i915_request *request)
>  		  engine->name, request->fence.context, request->fence.seqno);
>  	dma_fence_set_error(&request->fence, -EIO);
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  	__i915_request_submit(request);
>  	i915_request_mark_complete(request);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  
>  	intel_engine_queue_breadcrumbs(engine);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 7ab28b6f62a1..669aa036242d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -730,14 +730,13 @@ static void reset_prepare(struct intel_engine_cs *engine)
>  
>  static void reset_ring(struct intel_engine_cs *engine, bool stalled)
>  {
> -	struct i915_timeline *tl = &engine->timeline;
>  	struct i915_request *pos, *rq;
>  	unsigned long flags;
>  	u32 head;
>  
>  	rq = NULL;
> -	spin_lock_irqsave(&tl->lock, flags);
> -	list_for_each_entry(pos, &tl->requests, link) {
> +	spin_lock_irqsave(&engine->active.lock, flags);
> +	list_for_each_entry(pos, &engine->active.requests, sched.link) {
>  		if (!i915_request_completed(pos)) {
>  			rq = pos;
>  			break;
> @@ -791,7 +790,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
>  	}
>  	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
>  
> -	spin_unlock_irqrestore(&tl->lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void reset_finish(struct intel_engine_cs *engine)
> @@ -877,10 +876,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
>  	struct i915_request *request;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	/* Mark all submitted requests as skipped. */
> -	list_for_each_entry(request, &engine->timeline.requests, link) {
> +	list_for_each_entry(request, &engine->active.requests, sched.link) {
>  		if (!i915_request_signaled(request))
>  			dma_fence_set_error(&request->fence, -EIO);
>  
> @@ -889,7 +888,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
>  
>  	/* Remaining _unready_ requests will be nop'ed when submitted */
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void i9xx_submit_request(struct i915_request *request)
> @@ -1267,8 +1266,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
>  
>  	GEM_BUG_ON(!is_power_of_2(size));
>  	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
> -	GEM_BUG_ON(timeline == &engine->timeline);
> -	lockdep_assert_held(&engine->i915->drm.struct_mutex);
>  
>  	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
>  	if (!ring)
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index b7675ef18523..00c666d3e652 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -229,17 +229,17 @@ static void mock_cancel_requests(struct intel_engine_cs *engine)
>  	struct i915_request *request;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	/* Mark all submitted requests as skipped. */
> -	list_for_each_entry(request, &engine->timeline.requests, sched.link) {
> +	list_for_each_entry(request, &engine->active.requests, sched.link) {
>  		if (!i915_request_signaled(request))
>  			dma_fence_set_error(&request->fence, -EIO);
>  
>  		i915_request_mark_complete(request);
>  	}
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
> @@ -285,28 +285,23 @@ int mock_engine_init(struct intel_engine_cs *engine)
>  	struct drm_i915_private *i915 = engine->i915;
>  	int err;
>  
> +	intel_engine_init_active(engine, ENGINE_MOCK);
>  	intel_engine_init_breadcrumbs(engine);
>  	intel_engine_init_execlists(engine);
>  	intel_engine_init__pm(engine);
>  
> -	if (i915_timeline_init(i915, &engine->timeline, NULL))
> -		goto err_breadcrumbs;
> -	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
> -
>  	engine->kernel_context =
>  		i915_gem_context_get_engine(i915->kernel_context, engine->id);
>  	if (IS_ERR(engine->kernel_context))
> -		goto err_timeline;
> +		goto err_breadcrumbs;
>  
>  	err = intel_context_pin(engine->kernel_context);
>  	intel_context_put(engine->kernel_context);
>  	if (err)
> -		goto err_timeline;
> +		goto err_breadcrumbs;
>  
>  	return 0;
>  
> -err_timeline:
> -	i915_timeline_fini(&engine->timeline);
>  err_breadcrumbs:
>  	intel_engine_fini_breadcrumbs(engine);
>  	return -ENOMEM;
> @@ -340,7 +335,6 @@ void mock_engine_free(struct intel_engine_cs *engine)
>  	intel_context_unpin(engine->kernel_context);
>  
>  	intel_engine_fini_breadcrumbs(engine);
> -	i915_timeline_fini(&engine->timeline);
>  
>  	kfree(engine);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index dc026d5cd7a0..4cbee4c206bd 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1275,7 +1275,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
>  
>  	count = 0;
>  	request = first;
> -	list_for_each_entry_from(request, &engine->timeline.requests, link)
> +	list_for_each_entry_from(request, &engine->active.requests, sched.link)
>  		count++;
>  	if (!count)
>  		return;
> @@ -1288,7 +1288,8 @@ static void engine_record_requests(struct intel_engine_cs *engine,
>  
>  	count = 0;
>  	request = first;
> -	list_for_each_entry_from(request, &engine->timeline.requests, link) {
> +	list_for_each_entry_from(request,
> +				 &engine->active.requests, sched.link) {
>  		if (count >= ee->num_requests) {
>  			/*
>  			 * If the ring request list was changed in
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 9c58ae6e4afb..6b0a4d9343a6 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -232,9 +232,9 @@ static bool i915_request_retire(struct i915_request *rq)
>  
>  	local_irq_disable();
>  
> -	spin_lock(&rq->engine->timeline.lock);
> -	list_del(&rq->link);
> -	spin_unlock(&rq->engine->timeline.lock);
> +	spin_lock(&rq->engine->active.lock);
> +	list_del(&rq->sched.link);
> +	spin_unlock(&rq->engine->active.lock);
>  
>  	spin_lock(&rq->lock);
>  	i915_request_mark_complete(rq);
> @@ -254,6 +254,7 @@ static bool i915_request_retire(struct i915_request *rq)
>  	intel_context_unpin(rq->hw_context);
>  
>  	i915_request_remove_from_client(rq);
> +	list_del(&rq->link);
>  
>  	free_capture_list(rq);
>  	i915_sched_node_fini(&rq->sched);
> @@ -373,28 +374,17 @@ __i915_request_await_execution(struct i915_request *rq,
>  	return 0;
>  }
>  
> -static void move_to_timeline(struct i915_request *request,
> -			     struct i915_timeline *timeline)
> -{
> -	GEM_BUG_ON(request->timeline == &request->engine->timeline);
> -	lockdep_assert_held(&request->engine->timeline.lock);
> -
> -	spin_lock(&request->timeline->lock);
> -	list_move_tail(&request->link, &timeline->requests);
> -	spin_unlock(&request->timeline->lock);
> -}
> -
>  void __i915_request_submit(struct i915_request *request)
>  {
>  	struct intel_engine_cs *engine = request->engine;
>  
> -	GEM_TRACE("%s fence %llx:%lld -> current %d\n",
> +	GEM_TRACE("%s fence %llx:%lld, current %d\n",
>  		  engine->name,
>  		  request->fence.context, request->fence.seqno,
>  		  hwsp_seqno(request));
>  
>  	GEM_BUG_ON(!irqs_disabled());
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	if (i915_gem_context_is_banned(request->gem_context))
>  		i915_request_skip(request, -EIO);
> @@ -422,6 +412,8 @@ void __i915_request_submit(struct i915_request *request)
>  	/* We may be recursing from the signal callback of another i915 fence */
>  	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
>  
> +	list_move_tail(&request->sched.link, &engine->active.requests);
> +
>  	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
>  	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
>  
> @@ -437,9 +429,6 @@ void __i915_request_submit(struct i915_request *request)
>  	engine->emit_fini_breadcrumb(request,
>  				     request->ring->vaddr + request->postfix);
>  
> -	/* Transfer from per-context onto the global per-engine timeline */
> -	move_to_timeline(request, &engine->timeline);
> -
>  	engine->serial++;
>  
>  	trace_i915_request_execute(request);
> @@ -451,11 +440,11 @@ void i915_request_submit(struct i915_request *request)
>  	unsigned long flags;
>  
>  	/* Will be called from irq-context when using foreign fences. */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	__i915_request_submit(request);
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  void __i915_request_unsubmit(struct i915_request *request)
> @@ -468,7 +457,7 @@ void __i915_request_unsubmit(struct i915_request *request)
>  		  hwsp_seqno(request));
>  
>  	GEM_BUG_ON(!irqs_disabled());
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	/*
>  	 * Only unwind in reverse order, required so that the per-context list
> @@ -486,9 +475,6 @@ void __i915_request_unsubmit(struct i915_request *request)
>  
>  	spin_unlock(&request->lock);
>  
> -	/* Transfer back from the global per-engine timeline to per-context */
> -	move_to_timeline(request, request->timeline);
> -
>  	/* We've already spun, don't charge on resubmitting. */
>  	if (request->sched.semaphores && i915_request_started(request)) {
>  		request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
> @@ -510,11 +496,11 @@ void i915_request_unsubmit(struct i915_request *request)
>  	unsigned long flags;
>  
>  	/* Will be called from irq-context when using foreign fences. */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	__i915_request_unsubmit(request);
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static int __i915_sw_fence_call
> @@ -669,7 +655,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>  	rq->engine = ce->engine;
>  	rq->ring = ce->ring;
>  	rq->timeline = tl;
> -	GEM_BUG_ON(rq->timeline == &ce->engine->timeline);
>  	rq->hwsp_seqno = tl->hwsp_seqno;
>  	rq->hwsp_cacheline = tl->hwsp_cacheline;
>  	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
> @@ -1137,9 +1122,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
>  							 0);
>  	}
>  
> -	spin_lock_irq(&timeline->lock);
>  	list_add_tail(&rq->link, &timeline->requests);
> -	spin_unlock_irq(&timeline->lock);
>  
>  	/*
>  	 * Make sure that no request gazumped us - if it was allocated after
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index c9f7d07991c8..edbbdfec24ab 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -217,7 +217,7 @@ struct i915_request {
>  
>  	bool waitboost;
>  
> -	/** engine->request_list entry for this request */
> +	/** timeline->request entry for this request */
>  	struct list_head link;
>  
>  	/** ring->request_list entry for this request */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 78ceb56d7801..2e9b38bdc33c 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -77,7 +77,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
>  	bool first = true;
>  	int idx, i;
>  
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  	assert_priolists(execlists);
>  
>  	/* buckets sorted from highest [in slot 0] to lowest priority */
> @@ -162,9 +162,9 @@ sched_lock_engine(const struct i915_sched_node *node,
>  	 * check that the rq still belongs to the newly locked engine.
>  	 */
>  	while (locked != (engine = READ_ONCE(rq->engine))) {
> -		spin_unlock(&locked->timeline.lock);
> +		spin_unlock(&locked->active.lock);
>  		memset(cache, 0, sizeof(*cache));
> -		spin_lock(&engine->timeline.lock);
> +		spin_lock(&engine->active.lock);
>  		locked = engine;
>  	}
>  
> @@ -189,7 +189,7 @@ static void kick_submission(struct intel_engine_cs *engine, int prio)
>  	 * tasklet, i.e. we have not change the priority queue
>  	 * sufficiently to oust the running context.
>  	 */
> -	if (inflight && !i915_scheduler_need_preempt(prio, rq_prio(inflight)))
> +	if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight)))

Ok. Hmm yes. Unexpected but nothing to trip over.

>  		return;
>  
>  	tasklet_hi_schedule(&engine->execlists.tasklet);
> @@ -278,7 +278,7 @@ static void __i915_schedule(struct i915_sched_node *node,
>  
>  	memset(&cache, 0, sizeof(cache));
>  	engine = node_to_request(node)->engine;
> -	spin_lock(&engine->timeline.lock);
> +	spin_lock(&engine->active.lock);
>  
>  	/* Fifo and depth-first replacement ensure our deps execute before us */
>  	engine = sched_lock_engine(node, engine, &cache);
> @@ -287,7 +287,7 @@ static void __i915_schedule(struct i915_sched_node *node,
>  
>  		node = dep->signaler;
>  		engine = sched_lock_engine(node, engine, &cache);
> -		lockdep_assert_held(&engine->timeline.lock);
> +		lockdep_assert_held(&engine->active.lock);
>  
>  		/* Recheck after acquiring the engine->timeline.lock */
>  		if (prio <= node->attr.priority || node_signaled(node))
> @@ -296,14 +296,8 @@ static void __i915_schedule(struct i915_sched_node *node,
>  		GEM_BUG_ON(node_to_request(node)->engine != engine);
>  
>  		node->attr.priority = prio;
> -		if (!list_empty(&node->link)) {
> -			GEM_BUG_ON(intel_engine_is_virtual(engine));
> -			if (!cache.priolist)
> -				cache.priolist =
> -					i915_sched_lookup_priolist(engine,
> -								   prio);
> -			list_move_tail(&node->link, cache.priolist);
> -		} else {
> +
> +		if (list_empty(&node->link)) {
>  			/*
>  			 * If the request is not in the priolist queue because
>  			 * it is not yet runnable, then it doesn't contribute
> @@ -312,8 +306,16 @@ static void __i915_schedule(struct i915_sched_node *node,
>  			 * queue; but in that case we may still need to reorder
>  			 * the inflight requests.
>  			 */
> -			if (!i915_sw_fence_done(&node_to_request(node)->submit))
> -				continue;

Was smooth ride until here. Where did this go?

> +			continue;
> +		}
> +
> +		if (!intel_engine_is_virtual(engine) &&
> +		    !i915_request_is_active(node_to_request(node))) {

Is this the replacement? But it is now inside the virtual check which
was bug on previously.

*trips over*
-Mika

> +			if (!cache.priolist)
> +				cache.priolist =
> +					i915_sched_lookup_priolist(engine,
> +								   prio);
> +			list_move_tail(&node->link, cache.priolist);
>  		}
>  
>  		if (prio <= engine->execlists.queue_priority_hint)
> @@ -325,7 +327,7 @@ static void __i915_schedule(struct i915_sched_node *node,
>  		kick_submission(engine, prio);
>  	}
>  
> -	spin_unlock(&engine->timeline.lock);
> +	spin_unlock(&engine->active.lock);
>  }
>  
>  void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
> @@ -439,8 +441,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>  {
>  	struct i915_dependency *dep, *tmp;
>  
> -	GEM_BUG_ON(!list_empty(&node->link));
> -
>  	spin_lock_irq(&schedule_lock);
>  
>  	/*
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index 000e1a9b6750..c311ce9c6f9d 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -251,7 +251,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
>  
>  	timeline->fence_context = dma_fence_context_alloc(1);
>  
> -	spin_lock_init(&timeline->lock);
>  	mutex_init(&timeline->mutex);
>  
>  	INIT_ACTIVE_REQUEST(&timeline->last_request);
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index 27668a1a69a3..36e5e5a65155 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -36,25 +36,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
>  		       struct i915_vma *hwsp);
>  void i915_timeline_fini(struct i915_timeline *tl);
>  
> -static inline void
> -i915_timeline_set_subclass(struct i915_timeline *timeline,
> -			   unsigned int subclass)
> -{
> -	lockdep_set_subclass(&timeline->lock, subclass);
> -
> -	/*
> -	 * Due to an interesting quirk in lockdep's internal debug tracking,
> -	 * after setting a subclass we must ensure the lock is used. Otherwise,
> -	 * nr_unused_locks is incremented once too often.
> -	 */
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> -	local_irq_disable();
> -	lock_map_acquire(&timeline->lock.dep_map);
> -	lock_map_release(&timeline->lock.dep_map);
> -	local_irq_enable();
> -#endif
> -}
> -
>  struct i915_timeline *
>  i915_timeline_create(struct drm_i915_private *i915,
>  		     struct i915_vma *global_hwsp);
> diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
> index 1688705f4a2b..fce5cb4f1090 100644
> --- a/drivers/gpu/drm/i915/i915_timeline_types.h
> +++ b/drivers/gpu/drm/i915/i915_timeline_types.h
> @@ -23,10 +23,6 @@ struct i915_timeline {
>  	u64 fence_context;
>  	u32 seqno;
>  
> -	spinlock_t lock;
> -#define TIMELINE_CLIENT 0 /* default subclass */
> -#define TIMELINE_ENGINE 1
> -#define TIMELINE_VIRTUAL 2
>  	struct mutex mutex; /* protects the flow of requests */
>  
>  	unsigned int pin_count;
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 89592ef778b8..928121f06054 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -740,7 +740,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>  	bool submit = false;
>  	struct rb_node *rb;
>  
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	if (port_isset(port)) {
>  		if (intel_engine_has_preemption(engine)) {
> @@ -822,7 +822,7 @@ static void guc_submission_tasklet(unsigned long data)
>  	struct i915_request *rq;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	rq = port_request(port);
>  	while (rq && i915_request_completed(rq)) {
> @@ -847,7 +847,7 @@ static void guc_submission_tasklet(unsigned long data)
>  	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
>  		guc_dequeue(engine);
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void guc_reset_prepare(struct intel_engine_cs *engine)
> @@ -884,7 +884,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
>  	struct i915_request *rq;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	execlists_cancel_port_requests(execlists);
>  
> @@ -900,7 +900,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
>  	intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
>  
>  out_unlock:
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void guc_cancel_requests(struct intel_engine_cs *engine)
> @@ -926,13 +926,13 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
>  	 * submission's irq state, we also wish to remind ourselves that
>  	 * it is irq state.)
>  	 */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	/* Cancel the requests on the HW and clear the ELSP tracker. */
>  	execlists_cancel_port_requests(execlists);
>  
>  	/* Mark all executing requests as skipped. */
> -	list_for_each_entry(rq, &engine->timeline.requests, link) {
> +	list_for_each_entry(rq, &engine->active.requests, sched.link) {
>  		if (!i915_request_signaled(rq))
>  			dma_fence_set_error(&rq->fence, -EIO);
>  
> @@ -961,7 +961,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
>  	execlists->queue = RB_ROOT_CACHED;
>  	GEM_BUG_ON(port_isset(execlists->port));
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void guc_reset_finish(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
> index e084476469ef..65b52be23d42 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
> @@ -13,7 +13,6 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
>  	timeline->i915 = NULL;
>  	timeline->fence_context = context;
>  
> -	spin_lock_init(&timeline->lock);
>  	mutex_init(&timeline->mutex);
>  
>  	INIT_ACTIVE_REQUEST(&timeline->last_request);
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list
  2019-06-14 14:34   ` Mika Kuoppala
@ 2019-06-14 14:44     ` Chris Wilson
  0 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2019-06-14 14:44 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-06-14 15:34:21)
> > @@ -296,14 +296,8 @@ static void __i915_schedule(struct i915_sched_node *node,
> >               GEM_BUG_ON(node_to_request(node)->engine != engine);
> >  
> >               node->attr.priority = prio;
> > -             if (!list_empty(&node->link)) {
> > -                     GEM_BUG_ON(intel_engine_is_virtual(engine));
> > -                     if (!cache.priolist)
> > -                             cache.priolist =
> > -                                     i915_sched_lookup_priolist(engine,
> > -                                                                prio);
> > -                     list_move_tail(&node->link, cache.priolist);
> > -             } else {
> > +
> > +             if (list_empty(&node->link)) {
> >                       /*
> >                        * If the request is not in the priolist queue because
> >                        * it is not yet runnable, then it doesn't contribute
> > @@ -312,8 +306,16 @@ static void __i915_schedule(struct i915_sched_node *node,
> >                        * queue; but in that case we may still need to reorder
> >                        * the inflight requests.
> >                        */
> > -                     if (!i915_sw_fence_done(&node_to_request(node)->submit))
> > -                             continue;
> 
> Was smooth ride until here. Where did this go?
> 
> > +                     continue;
> > +             }
> > +
> > +             if (!intel_engine_is_virtual(engine) &&
> > +                 !i915_request_is_active(node_to_request(node))) {
> 
> Is this the replacement? But it is now inside the virtual check which
> was bug on previously.

Yes. It is because we are reusing rq->sched.link now, and keeping the
rq->link as solely a link along rq->timeline->requests.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list
  2019-06-12  9:31 ` [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list Chris Wilson
  2019-06-14 14:34   ` Mika Kuoppala
@ 2019-06-14 15:50   ` Mika Kuoppala
  2019-06-14 15:58     ` Chris Wilson
  1 sibling, 1 reply; 31+ messages in thread
From: Mika Kuoppala @ 2019-06-14 15:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> To continue the onslaught of removing the assumption of a global
> execution ordering, another casualty is the engine->timeline. Without an
> actual timeline to track, it is overkill and we can replace it with a
> much less grand plain list. We still need a list of requests inflight,
> for the simple purpose of finding inflight requests (for retiring,
> resetting, preemption etc).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine.h        |  6 ++
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 62 ++++++------
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  6 +-
>  drivers/gpu/drm/i915/gt/intel_lrc.c           | 95 ++++++++++---------
>  drivers/gpu/drm/i915/gt/intel_reset.c         | 10 +-
>  drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 15 ++-
>  drivers/gpu/drm/i915/gt/mock_engine.c         | 18 ++--
>  drivers/gpu/drm/i915/i915_gpu_error.c         |  5 +-
>  drivers/gpu/drm/i915/i915_request.c           | 43 +++------
>  drivers/gpu/drm/i915/i915_request.h           |  2 +-
>  drivers/gpu/drm/i915/i915_scheduler.c         | 38 ++++----
>  drivers/gpu/drm/i915/i915_timeline.c          |  1 -
>  drivers/gpu/drm/i915/i915_timeline.h          | 19 ----
>  drivers/gpu/drm/i915/i915_timeline_types.h    |  4 -
>  drivers/gpu/drm/i915/intel_guc_submission.c   | 16 ++--
>  .../gpu/drm/i915/selftests/mock_timeline.c    |  1 -
>  16 files changed, 153 insertions(+), 188 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index b9fd88f21609..6be607e9c084 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -564,4 +564,10 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
>  
>  #endif
>  
> +void intel_engine_init_active(struct intel_engine_cs *engine,
> +			      unsigned int subclass);
> +#define ENGINE_PHYSICAL	0
> +#define ENGINE_MOCK	1
> +#define ENGINE_VIRTUAL	2
> +
>  #endif /* _INTEL_RINGBUFFER_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 5a08036ae774..01f50cfd517c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -617,14 +617,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
>  	if (err)
>  		return err;
>  
> -	err = i915_timeline_init(engine->i915,
> -				 &engine->timeline,
> -				 engine->status_page.vma);
> -	if (err)
> -		goto err_hwsp;
> -
> -	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
> -
> +	intel_engine_init_active(engine, ENGINE_PHYSICAL);
>  	intel_engine_init_breadcrumbs(engine);
>  	intel_engine_init_execlists(engine);
>  	intel_engine_init_hangcheck(engine);
> @@ -637,10 +630,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
>  		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
>  
>  	return 0;
> -
> -err_hwsp:
> -	cleanup_status_page(engine);
> -	return err;
>  }
>  
>  /**
> @@ -797,6 +786,27 @@ static int pin_context(struct i915_gem_context *ctx,
>  	return 0;
>  }
>  
> +void
> +intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
> +{
> +	INIT_LIST_HEAD(&engine->active.requests);
> +
> +	spin_lock_init(&engine->active.lock);
> +	lockdep_set_subclass(&engine->active.lock, subclass);
> +
> +	/*
> +	 * Due to an interesting quirk in lockdep's internal debug tracking,
> +	 * after setting a subclass we must ensure the lock is used. Otherwise,
> +	 * nr_unused_locks is incremented once too often.
> +	 */
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	local_irq_disable();
> +	lock_map_acquire(&engine->active.lock.dep_map);
> +	lock_map_release(&engine->active.lock.dep_map);
> +	local_irq_enable();
> +#endif
> +}
> +
>  /**
>   * intel_engines_init_common - initialize cengine state which might require hw access
>   * @engine: Engine to initialize.
> @@ -860,6 +870,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>   */
>  void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>  {
> +	GEM_BUG_ON(!list_empty(&engine->active.requests));
> +
>  	cleanup_status_page(engine);
>  
>  	intel_engine_fini_breadcrumbs(engine);
> @@ -874,8 +886,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>  	intel_context_unpin(engine->kernel_context);
>  	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
>  
> -	i915_timeline_fini(&engine->timeline);
> -
>  	intel_wa_list_free(&engine->ctx_wa_list);
>  	intel_wa_list_free(&engine->wa_list);
>  	intel_wa_list_free(&engine->whitelist);
> @@ -1482,16 +1492,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>  
>  	drm_printf(m, "\tRequests:\n");
>  
> -	rq = list_first_entry(&engine->timeline.requests,
> -			      struct i915_request, link);
> -	if (&rq->link != &engine->timeline.requests)
> -		print_request(m, rq, "\t\tfirst  ");
> -
> -	rq = list_last_entry(&engine->timeline.requests,
> -			     struct i915_request, link);
> -	if (&rq->link != &engine->timeline.requests)
> -		print_request(m, rq, "\t\tlast   ");
> -
>  	rq = intel_engine_find_active_request(engine);
>  	if (rq) {
>  		print_request(m, rq, "\t\tactive ");
> @@ -1572,7 +1572,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>  	if (!intel_engine_supports_stats(engine))
>  		return -ENODEV;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  	write_seqlock(&engine->stats.lock);
>  
>  	if (unlikely(engine->stats.enabled == ~0)) {
> @@ -1598,7 +1598,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>  
>  unlock:
>  	write_sequnlock(&engine->stats.lock);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  
>  	return err;
>  }
> @@ -1683,22 +1683,22 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
>  	 * At all other times, we must assume the GPU is still running, but
>  	 * we only care about the snapshot of this moment.
>  	 */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> -	list_for_each_entry(request, &engine->timeline.requests, link) {
> +	spin_lock_irqsave(&engine->active.lock, flags);
> +	list_for_each_entry(request, &engine->active.requests, sched.link) {
>  		if (i915_request_completed(request))
>  			continue;
>  
>  		if (!i915_request_started(request))
> -			break;
> +			continue;
>  
>  		/* More than one preemptible request may match! */
>  		if (!match_ring(request))
> -			break;
> +			continue;
>  
>  		active = request;
>  		break;
>  	}
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  
>  	return active;
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 33a31aa2d2ae..b2faca8e5dec 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -288,7 +288,11 @@ struct intel_engine_cs {
>  
>  	struct intel_ring *buffer;
>  
> -	struct i915_timeline timeline;
> +	struct {
> +		spinlock_t lock;
> +		struct list_head requests;
> +	} active;
> +
>  	struct llist_head barrier_tasks;
>  
>  	struct intel_context *kernel_context; /* pinned */
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 05524489615c..853376895505 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -298,8 +298,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>  	 * Check against the first request in ELSP[1], it will, thanks to the
>  	 * power of PI, be the highest priority of that context.
>  	 */
> -	if (!list_is_last(&rq->link, &engine->timeline.requests) &&
> -	    rq_prio(list_next_entry(rq, link)) > last_prio)
> +	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
> +	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
>  		return true;
>  
>  	if (rb) {
> @@ -434,11 +434,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>  	struct list_head *uninitialized_var(pl);
>  	int prio = I915_PRIORITY_INVALID;
>  
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	list_for_each_entry_safe_reverse(rq, rn,
> -					 &engine->timeline.requests,
> -					 link) {
> +					 &engine->active.requests,
> +					 sched.link) {
>  		struct intel_engine_cs *owner;
>  
>  		if (i915_request_completed(rq))
> @@ -465,7 +465,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>  			}
>  			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>  
> -			list_add(&rq->sched.link, pl);
> +			list_move(&rq->sched.link, pl);
>  			active = rq;
>  		} else {
>  			rq->engine = owner;
> @@ -933,11 +933,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
>  		struct i915_request *rq;
>  
> -		spin_lock(&ve->base.timeline.lock);
> +		spin_lock(&ve->base.active.lock);
>  
>  		rq = ve->request;
>  		if (unlikely(!rq)) { /* lost the race to a sibling */
> -			spin_unlock(&ve->base.timeline.lock);
> +			spin_unlock(&ve->base.active.lock);
>  			rb_erase_cached(rb, &execlists->virtual);
>  			RB_CLEAR_NODE(rb);
>  			rb = rb_first_cached(&execlists->virtual);
> @@ -950,13 +950,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  
>  		if (rq_prio(rq) >= queue_prio(execlists)) {
>  			if (!virtual_matches(ve, rq, engine)) {
> -				spin_unlock(&ve->base.timeline.lock);
> +				spin_unlock(&ve->base.active.lock);
>  				rb = rb_next(rb);
>  				continue;
>  			}
>  
>  			if (last && !can_merge_rq(last, rq)) {
> -				spin_unlock(&ve->base.timeline.lock);
> +				spin_unlock(&ve->base.active.lock);
>  				return; /* leave this rq for another engine */
>  			}
>  
> @@ -1011,7 +1011,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  			last = rq;
>  		}
>  
> -		spin_unlock(&ve->base.timeline.lock);
> +		spin_unlock(&ve->base.active.lock);
>  		break;
>  	}
>  
> @@ -1068,8 +1068,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  				GEM_BUG_ON(port_isset(port));
>  			}
>  
> -			list_del_init(&rq->sched.link);
> -
>  			__i915_request_submit(rq);
>  			trace_i915_request_in(rq, port_index(port, execlists));
>  
> @@ -1170,7 +1168,7 @@ static void process_csb(struct intel_engine_cs *engine)
>  	const u8 num_entries = execlists->csb_size;
>  	u8 head, tail;
>  
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	/*
>  	 * Note that csb_write, csb_status may be either in HWSP or mmio.
> @@ -1330,7 +1328,7 @@ static void process_csb(struct intel_engine_cs *engine)
>  
>  static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
>  {
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	process_csb(engine);
>  	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> @@ -1351,15 +1349,16 @@ static void execlists_submission_tasklet(unsigned long data)
>  		  !!intel_wakeref_active(&engine->wakeref),
>  		  engine->execlists.active);
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  	__execlists_submission_tasklet(engine);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void queue_request(struct intel_engine_cs *engine,
>  			  struct i915_sched_node *node,
>  			  int prio)
>  {
> +	GEM_BUG_ON(!list_empty(&node->link));
>  	list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
>  }
>  
> @@ -1390,7 +1389,7 @@ static void execlists_submit_request(struct i915_request *request)
>  	unsigned long flags;
>  
>  	/* Will be called from irq-context when using foreign fences. */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	queue_request(engine, &request->sched, rq_prio(request));
>  
> @@ -1399,7 +1398,7 @@ static void execlists_submit_request(struct i915_request *request)
>  
>  	submit_queue(engine, rq_prio(request));
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void __execlists_context_fini(struct intel_context *ce)
> @@ -2050,8 +2049,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
>  	intel_engine_stop_cs(engine);
>  
>  	/* And flush any current direct submission. */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static bool lrc_regs_ok(const struct i915_request *rq)
> @@ -2094,11 +2093,11 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
>  
>  static struct i915_request *active_request(struct i915_request *rq)
>  {
> -	const struct list_head * const list = &rq->engine->timeline.requests;
> +	const struct list_head * const list = &rq->engine->active.requests;
>  	const struct intel_context * const context = rq->hw_context;
>  	struct i915_request *active = NULL;
>  
> -	list_for_each_entry_from_reverse(rq, list, link) {
> +	list_for_each_entry_from_reverse(rq, list, sched.link) {
>  		if (i915_request_completed(rq))
>  			break;
>  
> @@ -2215,11 +2214,11 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
>  
>  	GEM_TRACE("%s\n", engine->name);
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	__execlists_reset(engine, stalled);
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void nop_submission_tasklet(unsigned long data)
> @@ -2250,12 +2249,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  	 * submission's irq state, we also wish to remind ourselves that
>  	 * it is irq state.)
>  	 */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	__execlists_reset(engine, true);
>  
>  	/* Mark all executing requests as skipped. */
> -	list_for_each_entry(rq, &engine->timeline.requests, link) {
> +	list_for_each_entry(rq, &engine->active.requests, sched.link) {
>  		if (!i915_request_signaled(rq))
>  			dma_fence_set_error(&rq->fence, -EIO);
>  
> @@ -2286,7 +2285,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  		rb_erase_cached(rb, &execlists->virtual);
>  		RB_CLEAR_NODE(rb);
>  
> -		spin_lock(&ve->base.timeline.lock);
> +		spin_lock(&ve->base.active.lock);
>  		if (ve->request) {
>  			ve->request->engine = engine;
>  			__i915_request_submit(ve->request);
> @@ -2295,7 +2294,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  			ve->base.execlists.queue_priority_hint = INT_MIN;
>  			ve->request = NULL;
>  		}
> -		spin_unlock(&ve->base.timeline.lock);
> +		spin_unlock(&ve->base.active.lock);
>  	}
>  
>  	/* Remaining _unready_ requests will be nop'ed when submitted */
> @@ -2307,7 +2306,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
>  	execlists->tasklet.func = nop_submission_tasklet;
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void execlists_reset_finish(struct intel_engine_cs *engine)
> @@ -3010,12 +3009,18 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
>  	return ret;
>  }
>  
> +static struct list_head *virtual_queue(struct virtual_engine *ve)
> +{
> +	return &ve->base.execlists.default_priolist.requests[0];
> +}
> +
>  static void virtual_context_destroy(struct kref *kref)
>  {
>  	struct virtual_engine *ve =
>  		container_of(kref, typeof(*ve), context.ref);
>  	unsigned int n;
>  
> +	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
>  	GEM_BUG_ON(ve->request);
>  	GEM_BUG_ON(ve->context.inflight);
>  
> @@ -3026,13 +3031,13 @@ static void virtual_context_destroy(struct kref *kref)
>  		if (RB_EMPTY_NODE(node))
>  			continue;
>  
> -		spin_lock_irq(&sibling->timeline.lock);
> +		spin_lock_irq(&sibling->active.lock);
>  
>  		/* Detachment is lazily performed in the execlists tasklet */
>  		if (!RB_EMPTY_NODE(node))
>  			rb_erase_cached(node, &sibling->execlists.virtual);
>  
> -		spin_unlock_irq(&sibling->timeline.lock);
> +		spin_unlock_irq(&sibling->active.lock);
>  	}
>  	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
>  
> @@ -3040,8 +3045,6 @@ static void virtual_context_destroy(struct kref *kref)
>  		__execlists_context_fini(&ve->context);
>  
>  	kfree(ve->bonds);
> -
> -	i915_timeline_fini(&ve->base.timeline);
>  	kfree(ve);
>  }
>  
> @@ -3161,16 +3164,16 @@ static void virtual_submission_tasklet(unsigned long data)
>  
>  		if (unlikely(!(mask & sibling->mask))) {
>  			if (!RB_EMPTY_NODE(&node->rb)) {
> -				spin_lock(&sibling->timeline.lock);
> +				spin_lock(&sibling->active.lock);
>  				rb_erase_cached(&node->rb,
>  						&sibling->execlists.virtual);
>  				RB_CLEAR_NODE(&node->rb);
> -				spin_unlock(&sibling->timeline.lock);
> +				spin_unlock(&sibling->active.lock);
>  			}
>  			continue;
>  		}
>  
> -		spin_lock(&sibling->timeline.lock);
> +		spin_lock(&sibling->active.lock);
>  
>  		if (!RB_EMPTY_NODE(&node->rb)) {
>  			/*
> @@ -3214,7 +3217,7 @@ static void virtual_submission_tasklet(unsigned long data)
>  			tasklet_hi_schedule(&sibling->execlists.tasklet);
>  		}
>  
> -		spin_unlock(&sibling->timeline.lock);
> +		spin_unlock(&sibling->active.lock);
>  	}
>  	local_irq_enable();
>  }
> @@ -3231,9 +3234,13 @@ static void virtual_submit_request(struct i915_request *rq)
>  	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
>  
>  	GEM_BUG_ON(ve->request);
> +	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
> +
>  	ve->base.execlists.queue_priority_hint = rq_prio(rq);
>  	WRITE_ONCE(ve->request, rq);
>  
> +	list_move_tail(&rq->sched.link, virtual_queue(ve));
> +
>  	tasklet_schedule(&ve->base.execlists.tasklet);
>  }
>  
> @@ -3297,10 +3304,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>  
>  	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
>  
> -	err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
> -	if (err)
> -		goto err_put;
> -	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
> +	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
>  
>  	intel_engine_init_execlists(&ve->base);
>  
> @@ -3311,6 +3315,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>  	ve->base.submit_request = virtual_submit_request;
>  	ve->base.bond_execute = virtual_bond_execute;
>  
> +	INIT_LIST_HEAD(virtual_queue(ve));
>  	ve->base.execlists.queue_priority_hint = INT_MIN;
>  	tasklet_init(&ve->base.execlists.tasklet,
>  		     virtual_submission_tasklet,
> @@ -3465,11 +3470,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>  	unsigned int count;
>  	struct rb_node *rb;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	last = NULL;
>  	count = 0;
> -	list_for_each_entry(rq, &engine->timeline.requests, link) {
> +	list_for_each_entry(rq, &engine->active.requests, sched.link) {
>  		if (count++ < max - 1)
>  			show_request(m, rq, "\t\tE ");
>  		else
> @@ -3532,7 +3537,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>  		show_request(m, last, "\t\tV ");
>  	}
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  void intel_lr_context_reset(struct intel_engine_cs *engine,
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 60d24110af80..cf258ec38ba6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -49,12 +49,12 @@ static void engine_skip_context(struct i915_request *rq)
>  	struct intel_engine_cs *engine = rq->engine;
>  	struct i915_gem_context *hung_ctx = rq->gem_context;
>  
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	if (!i915_request_is_active(rq))
>  		return;
>  
> -	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
> +	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
>  		if (rq->gem_context == hung_ctx)
>  			i915_request_skip(rq, -EIO);
>  }
> @@ -130,7 +130,7 @@ void i915_reset_request(struct i915_request *rq, bool guilty)
>  		  rq->fence.seqno,
>  		  yesno(guilty));
>  
> -	lockdep_assert_held(&rq->engine->timeline.lock);
> +	lockdep_assert_held(&rq->engine->active.lock);
>  	GEM_BUG_ON(i915_request_completed(rq));
>  
>  	if (guilty) {
> @@ -785,10 +785,10 @@ static void nop_submit_request(struct i915_request *request)
>  		  engine->name, request->fence.context, request->fence.seqno);
>  	dma_fence_set_error(&request->fence, -EIO);
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  	__i915_request_submit(request);
>  	i915_request_mark_complete(request);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  
>  	intel_engine_queue_breadcrumbs(engine);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 7ab28b6f62a1..669aa036242d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -730,14 +730,13 @@ static void reset_prepare(struct intel_engine_cs *engine)
>  
>  static void reset_ring(struct intel_engine_cs *engine, bool stalled)
>  {
> -	struct i915_timeline *tl = &engine->timeline;
>  	struct i915_request *pos, *rq;
>  	unsigned long flags;
>  	u32 head;
>  
>  	rq = NULL;
> -	spin_lock_irqsave(&tl->lock, flags);
> -	list_for_each_entry(pos, &tl->requests, link) {
> +	spin_lock_irqsave(&engine->active.lock, flags);
> +	list_for_each_entry(pos, &engine->active.requests, sched.link) {
>  		if (!i915_request_completed(pos)) {
>  			rq = pos;
>  			break;
> @@ -791,7 +790,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
>  	}
>  	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
>  
> -	spin_unlock_irqrestore(&tl->lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void reset_finish(struct intel_engine_cs *engine)
> @@ -877,10 +876,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
>  	struct i915_request *request;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	/* Mark all submitted requests as skipped. */
> -	list_for_each_entry(request, &engine->timeline.requests, link) {
> +	list_for_each_entry(request, &engine->active.requests, sched.link) {
>  		if (!i915_request_signaled(request))
>  			dma_fence_set_error(&request->fence, -EIO);
>  
> @@ -889,7 +888,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
>  
>  	/* Remaining _unready_ requests will be nop'ed when submitted */
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void i9xx_submit_request(struct i915_request *request)
> @@ -1267,8 +1266,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
>  
>  	GEM_BUG_ON(!is_power_of_2(size));
>  	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
> -	GEM_BUG_ON(timeline == &engine->timeline);
> -	lockdep_assert_held(&engine->i915->drm.struct_mutex);
>  
>  	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
>  	if (!ring)
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index b7675ef18523..00c666d3e652 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -229,17 +229,17 @@ static void mock_cancel_requests(struct intel_engine_cs *engine)
>  	struct i915_request *request;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	/* Mark all submitted requests as skipped. */
> -	list_for_each_entry(request, &engine->timeline.requests, sched.link) {
> +	list_for_each_entry(request, &engine->active.requests, sched.link) {
>  		if (!i915_request_signaled(request))
>  			dma_fence_set_error(&request->fence, -EIO);
>  
>  		i915_request_mark_complete(request);
>  	}
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
> @@ -285,28 +285,23 @@ int mock_engine_init(struct intel_engine_cs *engine)
>  	struct drm_i915_private *i915 = engine->i915;
>  	int err;
>  
> +	intel_engine_init_active(engine, ENGINE_MOCK);
>  	intel_engine_init_breadcrumbs(engine);
>  	intel_engine_init_execlists(engine);
>  	intel_engine_init__pm(engine);
>  
> -	if (i915_timeline_init(i915, &engine->timeline, NULL))
> -		goto err_breadcrumbs;
> -	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
> -
>  	engine->kernel_context =
>  		i915_gem_context_get_engine(i915->kernel_context, engine->id);
>  	if (IS_ERR(engine->kernel_context))
> -		goto err_timeline;
> +		goto err_breadcrumbs;
>  
>  	err = intel_context_pin(engine->kernel_context);
>  	intel_context_put(engine->kernel_context);
>  	if (err)
> -		goto err_timeline;
> +		goto err_breadcrumbs;
>  
>  	return 0;
>  
> -err_timeline:
> -	i915_timeline_fini(&engine->timeline);
>  err_breadcrumbs:
>  	intel_engine_fini_breadcrumbs(engine);
>  	return -ENOMEM;
> @@ -340,7 +335,6 @@ void mock_engine_free(struct intel_engine_cs *engine)
>  	intel_context_unpin(engine->kernel_context);
>  
>  	intel_engine_fini_breadcrumbs(engine);
> -	i915_timeline_fini(&engine->timeline);
>  
>  	kfree(engine);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index dc026d5cd7a0..4cbee4c206bd 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1275,7 +1275,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
>  
>  	count = 0;
>  	request = first;
> -	list_for_each_entry_from(request, &engine->timeline.requests, link)
> +	list_for_each_entry_from(request, &engine->active.requests, sched.link)
>  		count++;
>  	if (!count)
>  		return;
> @@ -1288,7 +1288,8 @@ static void engine_record_requests(struct intel_engine_cs *engine,
>  
>  	count = 0;
>  	request = first;
> -	list_for_each_entry_from(request, &engine->timeline.requests, link) {
> +	list_for_each_entry_from(request,
> +				 &engine->active.requests, sched.link) {
>  		if (count >= ee->num_requests) {
>  			/*
>  			 * If the ring request list was changed in
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 9c58ae6e4afb..6b0a4d9343a6 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -232,9 +232,9 @@ static bool i915_request_retire(struct i915_request *rq)
>  
>  	local_irq_disable();
>  
> -	spin_lock(&rq->engine->timeline.lock);
> -	list_del(&rq->link);
> -	spin_unlock(&rq->engine->timeline.lock);
> +	spin_lock(&rq->engine->active.lock);
> +	list_del(&rq->sched.link);
> +	spin_unlock(&rq->engine->active.lock);
>  
>  	spin_lock(&rq->lock);
>  	i915_request_mark_complete(rq);
> @@ -254,6 +254,7 @@ static bool i915_request_retire(struct i915_request *rq)
>  	intel_context_unpin(rq->hw_context);
>  
>  	i915_request_remove_from_client(rq);
> +	list_del(&rq->link);

This is now unguarded. and the timeline mutex is not here
to help either.

-Mika


>  
>  	free_capture_list(rq);
>  	i915_sched_node_fini(&rq->sched);
> @@ -373,28 +374,17 @@ __i915_request_await_execution(struct i915_request *rq,
>  	return 0;
>  }
>  
> -static void move_to_timeline(struct i915_request *request,
> -			     struct i915_timeline *timeline)
> -{
> -	GEM_BUG_ON(request->timeline == &request->engine->timeline);
> -	lockdep_assert_held(&request->engine->timeline.lock);
> -
> -	spin_lock(&request->timeline->lock);
> -	list_move_tail(&request->link, &timeline->requests);
> -	spin_unlock(&request->timeline->lock);
> -}
> -
>  void __i915_request_submit(struct i915_request *request)
>  {
>  	struct intel_engine_cs *engine = request->engine;
>  
> -	GEM_TRACE("%s fence %llx:%lld -> current %d\n",
> +	GEM_TRACE("%s fence %llx:%lld, current %d\n",
>  		  engine->name,
>  		  request->fence.context, request->fence.seqno,
>  		  hwsp_seqno(request));
>  
>  	GEM_BUG_ON(!irqs_disabled());
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	if (i915_gem_context_is_banned(request->gem_context))
>  		i915_request_skip(request, -EIO);
> @@ -422,6 +412,8 @@ void __i915_request_submit(struct i915_request *request)
>  	/* We may be recursing from the signal callback of another i915 fence */
>  	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
>  
> +	list_move_tail(&request->sched.link, &engine->active.requests);
> +
>  	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
>  	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
>  
> @@ -437,9 +429,6 @@ void __i915_request_submit(struct i915_request *request)
>  	engine->emit_fini_breadcrumb(request,
>  				     request->ring->vaddr + request->postfix);
>  
> -	/* Transfer from per-context onto the global per-engine timeline */
> -	move_to_timeline(request, &engine->timeline);
> -
>  	engine->serial++;
>  
>  	trace_i915_request_execute(request);
> @@ -451,11 +440,11 @@ void i915_request_submit(struct i915_request *request)
>  	unsigned long flags;
>  
>  	/* Will be called from irq-context when using foreign fences. */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	__i915_request_submit(request);
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  void __i915_request_unsubmit(struct i915_request *request)
> @@ -468,7 +457,7 @@ void __i915_request_unsubmit(struct i915_request *request)
>  		  hwsp_seqno(request));
>  
>  	GEM_BUG_ON(!irqs_disabled());
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	/*
>  	 * Only unwind in reverse order, required so that the per-context list
> @@ -486,9 +475,6 @@ void __i915_request_unsubmit(struct i915_request *request)
>  
>  	spin_unlock(&request->lock);
>  
> -	/* Transfer back from the global per-engine timeline to per-context */
> -	move_to_timeline(request, request->timeline);
> -
>  	/* We've already spun, don't charge on resubmitting. */
>  	if (request->sched.semaphores && i915_request_started(request)) {
>  		request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
> @@ -510,11 +496,11 @@ void i915_request_unsubmit(struct i915_request *request)
>  	unsigned long flags;
>  
>  	/* Will be called from irq-context when using foreign fences. */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	__i915_request_unsubmit(request);
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static int __i915_sw_fence_call
> @@ -669,7 +655,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>  	rq->engine = ce->engine;
>  	rq->ring = ce->ring;
>  	rq->timeline = tl;
> -	GEM_BUG_ON(rq->timeline == &ce->engine->timeline);
>  	rq->hwsp_seqno = tl->hwsp_seqno;
>  	rq->hwsp_cacheline = tl->hwsp_cacheline;
>  	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
> @@ -1137,9 +1122,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
>  							 0);
>  	}
>  
> -	spin_lock_irq(&timeline->lock);
>  	list_add_tail(&rq->link, &timeline->requests);
> -	spin_unlock_irq(&timeline->lock);
>  
>  	/*
>  	 * Make sure that no request gazumped us - if it was allocated after
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index c9f7d07991c8..edbbdfec24ab 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -217,7 +217,7 @@ struct i915_request {
>  
>  	bool waitboost;
>  
> -	/** engine->request_list entry for this request */
> +	/** timeline->request entry for this request */
>  	struct list_head link;
>  
>  	/** ring->request_list entry for this request */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 78ceb56d7801..2e9b38bdc33c 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -77,7 +77,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
>  	bool first = true;
>  	int idx, i;
>  
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  	assert_priolists(execlists);
>  
>  	/* buckets sorted from highest [in slot 0] to lowest priority */
> @@ -162,9 +162,9 @@ sched_lock_engine(const struct i915_sched_node *node,
>  	 * check that the rq still belongs to the newly locked engine.
>  	 */
>  	while (locked != (engine = READ_ONCE(rq->engine))) {
> -		spin_unlock(&locked->timeline.lock);
> +		spin_unlock(&locked->active.lock);
>  		memset(cache, 0, sizeof(*cache));
> -		spin_lock(&engine->timeline.lock);
> +		spin_lock(&engine->active.lock);
>  		locked = engine;
>  	}
>  
> @@ -189,7 +189,7 @@ static void kick_submission(struct intel_engine_cs *engine, int prio)
>  	 * tasklet, i.e. we have not change the priority queue
>  	 * sufficiently to oust the running context.
>  	 */
> -	if (inflight && !i915_scheduler_need_preempt(prio, rq_prio(inflight)))
> +	if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight)))
>  		return;
>  
>  	tasklet_hi_schedule(&engine->execlists.tasklet);
> @@ -278,7 +278,7 @@ static void __i915_schedule(struct i915_sched_node *node,
>  
>  	memset(&cache, 0, sizeof(cache));
>  	engine = node_to_request(node)->engine;
> -	spin_lock(&engine->timeline.lock);
> +	spin_lock(&engine->active.lock);
>  
>  	/* Fifo and depth-first replacement ensure our deps execute before us */
>  	engine = sched_lock_engine(node, engine, &cache);
> @@ -287,7 +287,7 @@ static void __i915_schedule(struct i915_sched_node *node,
>  
>  		node = dep->signaler;
>  		engine = sched_lock_engine(node, engine, &cache);
> -		lockdep_assert_held(&engine->timeline.lock);
> +		lockdep_assert_held(&engine->active.lock);
>  
>  		/* Recheck after acquiring the engine->timeline.lock */
>  		if (prio <= node->attr.priority || node_signaled(node))
> @@ -296,14 +296,8 @@ static void __i915_schedule(struct i915_sched_node *node,
>  		GEM_BUG_ON(node_to_request(node)->engine != engine);
>  
>  		node->attr.priority = prio;
> -		if (!list_empty(&node->link)) {
> -			GEM_BUG_ON(intel_engine_is_virtual(engine));
> -			if (!cache.priolist)
> -				cache.priolist =
> -					i915_sched_lookup_priolist(engine,
> -								   prio);
> -			list_move_tail(&node->link, cache.priolist);
> -		} else {
> +
> +		if (list_empty(&node->link)) {
>  			/*
>  			 * If the request is not in the priolist queue because
>  			 * it is not yet runnable, then it doesn't contribute
> @@ -312,8 +306,16 @@ static void __i915_schedule(struct i915_sched_node *node,
>  			 * queue; but in that case we may still need to reorder
>  			 * the inflight requests.
>  			 */
> -			if (!i915_sw_fence_done(&node_to_request(node)->submit))
> -				continue;
> +			continue;
> +		}
> +
> +		if (!intel_engine_is_virtual(engine) &&
> +		    !i915_request_is_active(node_to_request(node))) {
> +			if (!cache.priolist)
> +				cache.priolist =
> +					i915_sched_lookup_priolist(engine,
> +								   prio);
> +			list_move_tail(&node->link, cache.priolist);
>  		}
>  
>  		if (prio <= engine->execlists.queue_priority_hint)
> @@ -325,7 +327,7 @@ static void __i915_schedule(struct i915_sched_node *node,
>  		kick_submission(engine, prio);
>  	}
>  
> -	spin_unlock(&engine->timeline.lock);
> +	spin_unlock(&engine->active.lock);
>  }
>  
>  void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
> @@ -439,8 +441,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>  {
>  	struct i915_dependency *dep, *tmp;
>  
> -	GEM_BUG_ON(!list_empty(&node->link));
> -
>  	spin_lock_irq(&schedule_lock);
>  
>  	/*
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index 000e1a9b6750..c311ce9c6f9d 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -251,7 +251,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
>  
>  	timeline->fence_context = dma_fence_context_alloc(1);
>  
> -	spin_lock_init(&timeline->lock);
>  	mutex_init(&timeline->mutex);
>  
>  	INIT_ACTIVE_REQUEST(&timeline->last_request);
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index 27668a1a69a3..36e5e5a65155 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -36,25 +36,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
>  		       struct i915_vma *hwsp);
>  void i915_timeline_fini(struct i915_timeline *tl);
>  
> -static inline void
> -i915_timeline_set_subclass(struct i915_timeline *timeline,
> -			   unsigned int subclass)
> -{
> -	lockdep_set_subclass(&timeline->lock, subclass);
> -
> -	/*
> -	 * Due to an interesting quirk in lockdep's internal debug tracking,
> -	 * after setting a subclass we must ensure the lock is used. Otherwise,
> -	 * nr_unused_locks is incremented once too often.
> -	 */
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> -	local_irq_disable();
> -	lock_map_acquire(&timeline->lock.dep_map);
> -	lock_map_release(&timeline->lock.dep_map);
> -	local_irq_enable();
> -#endif
> -}
> -
>  struct i915_timeline *
>  i915_timeline_create(struct drm_i915_private *i915,
>  		     struct i915_vma *global_hwsp);
> diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
> index 1688705f4a2b..fce5cb4f1090 100644
> --- a/drivers/gpu/drm/i915/i915_timeline_types.h
> +++ b/drivers/gpu/drm/i915/i915_timeline_types.h
> @@ -23,10 +23,6 @@ struct i915_timeline {
>  	u64 fence_context;
>  	u32 seqno;
>  
> -	spinlock_t lock;
> -#define TIMELINE_CLIENT 0 /* default subclass */
> -#define TIMELINE_ENGINE 1
> -#define TIMELINE_VIRTUAL 2
>  	struct mutex mutex; /* protects the flow of requests */
>  
>  	unsigned int pin_count;
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 89592ef778b8..928121f06054 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -740,7 +740,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>  	bool submit = false;
>  	struct rb_node *rb;
>  
> -	lockdep_assert_held(&engine->timeline.lock);
> +	lockdep_assert_held(&engine->active.lock);
>  
>  	if (port_isset(port)) {
>  		if (intel_engine_has_preemption(engine)) {
> @@ -822,7 +822,7 @@ static void guc_submission_tasklet(unsigned long data)
>  	struct i915_request *rq;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	rq = port_request(port);
>  	while (rq && i915_request_completed(rq)) {
> @@ -847,7 +847,7 @@ static void guc_submission_tasklet(unsigned long data)
>  	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
>  		guc_dequeue(engine);
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void guc_reset_prepare(struct intel_engine_cs *engine)
> @@ -884,7 +884,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
>  	struct i915_request *rq;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	execlists_cancel_port_requests(execlists);
>  
> @@ -900,7 +900,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
>  	intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
>  
>  out_unlock:
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void guc_cancel_requests(struct intel_engine_cs *engine)
> @@ -926,13 +926,13 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
>  	 * submission's irq state, we also wish to remind ourselves that
>  	 * it is irq state.)
>  	 */
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
> +	spin_lock_irqsave(&engine->active.lock, flags);
>  
>  	/* Cancel the requests on the HW and clear the ELSP tracker. */
>  	execlists_cancel_port_requests(execlists);
>  
>  	/* Mark all executing requests as skipped. */
> -	list_for_each_entry(rq, &engine->timeline.requests, link) {
> +	list_for_each_entry(rq, &engine->active.requests, sched.link) {
>  		if (!i915_request_signaled(rq))
>  			dma_fence_set_error(&rq->fence, -EIO);
>  
> @@ -961,7 +961,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
>  	execlists->queue = RB_ROOT_CACHED;
>  	GEM_BUG_ON(port_isset(execlists->port));
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
>  static void guc_reset_finish(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
> index e084476469ef..65b52be23d42 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
> @@ -13,7 +13,6 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
>  	timeline->i915 = NULL;
>  	timeline->fence_context = context;
>  
> -	spin_lock_init(&timeline->lock);
>  	mutex_init(&timeline->mutex);
>  
>  	INIT_ACTIVE_REQUEST(&timeline->last_request);
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list
  2019-06-14 15:50   ` Mika Kuoppala
@ 2019-06-14 15:58     ` Chris Wilson
  2019-06-14 16:18       ` Mika Kuoppala
  0 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2019-06-14 15:58 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-06-14 16:50:33)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > @@ -254,6 +254,7 @@ static bool i915_request_retire(struct i915_request *rq)
> >       intel_context_unpin(rq->hw_context);
> >  
> >       i915_request_remove_from_client(rq);
> > +     list_del(&rq->link);
> 
> This is now unguarded. and the timeline mutex is not here
> to help either.

It is guarded by the caller to i915_request_retire, we can only retire
under the same lock as we construct requests. Currently that is
struct_mutex with the big switcheroo coming at the end.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list
  2019-06-14 15:58     ` Chris Wilson
@ 2019-06-14 16:18       ` Mika Kuoppala
  0 siblings, 0 replies; 31+ messages in thread
From: Mika Kuoppala @ 2019-06-14 16:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-06-14 16:50:33)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> > @@ -254,6 +254,7 @@ static bool i915_request_retire(struct i915_request *rq)
>> >       intel_context_unpin(rq->hw_context);
>> >  
>> >       i915_request_remove_from_client(rq);
>> > +     list_del(&rq->link);
>> 
>> This is now unguarded. and the timeline mutex is not here
>> to help either.
>
> It is guarded by the caller to i915_request_retire, we can only retire
> under the same lock as we construct requests. Currently that is
> struct_mutex with the big switcheroo coming at the end.

Yup found it.
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> -Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2019-06-14 16:19 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-12  9:31 Endless busyness, the forecoming Chris Wilson
2019-06-12  9:31 ` [PATCH 1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
2019-06-12 13:29   ` Mika Kuoppala
2019-06-12 13:42     ` Chris Wilson
2019-06-12 14:09       ` Mika Kuoppala
2019-06-12 14:17         ` Chris Wilson
2019-06-12 14:26   ` [PATCH v2] " Chris Wilson
2019-06-14  9:22     ` Mika Kuoppala
2019-06-14  9:34       ` Chris Wilson
2019-06-14 10:18         ` Mika Kuoppala
2019-06-12  9:31 ` [PATCH 2/8] drm/i915: Stop retiring along engine Chris Wilson
2019-06-14 14:23   ` Mika Kuoppala
2019-06-12  9:31 ` [PATCH 3/8] drm/i915: Replace engine->timeline with a plain list Chris Wilson
2019-06-14 14:34   ` Mika Kuoppala
2019-06-14 14:44     ` Chris Wilson
2019-06-14 15:50   ` Mika Kuoppala
2019-06-14 15:58     ` Chris Wilson
2019-06-14 16:18       ` Mika Kuoppala
2019-06-12  9:31 ` [PATCH 4/8] drm/i915: Flush the execution-callbacks on retiring Chris Wilson
2019-06-12  9:31 ` [PATCH 5/8] drm/i915/execlists: Preempt-to-busy Chris Wilson
2019-06-12  9:31 ` [PATCH 6/8] drm/i915/execlists: Minimalistic timeslicing Chris Wilson
2019-06-12  9:31 ` [PATCH 7/8] drm/i915/execlists: Force preemption Chris Wilson
2019-06-12  9:31 ` [PATCH 8/8] drm/i915: Add a label for config DRM_I915_SPIN_REQUEST Chris Wilson
2019-06-12  9:53 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Patchwork
2019-06-12  9:57 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-06-12 10:16 ` ✓ Fi.CI.BAT: success " Patchwork
2019-06-12 15:29 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2) Patchwork
2019-06-12 15:33 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-06-12 16:00 ` ✓ Fi.CI.BAT: success " Patchwork
2019-06-13  6:16 ` ✗ Fi.CI.IGT: failure for series starting with [1/8] drm/i915: Keep contexts pinned until after the next kernel context switch Patchwork
2019-06-14  9:58 ` ✗ Fi.CI.IGT: failure for series starting with [v2] drm/i915: Keep contexts pinned until after the next kernel context switch (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.