All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling
@ 2019-04-17  7:56 Chris Wilson
  2019-04-17  7:56 ` [PATCH 02/32] drm/i915: Verify workarounds immediately after application Chris Wilson
                   ` (34 more replies)
  0 siblings, 35 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Currently there is an underlying assumption that i915_request_unsubmit()
is synchronous wrt the GPU -- that is the request is no longer in flight
as we remove it. In the near future that may change, and this may upset
our signaling as we can process an interrupt for that request while it
is no longer in flight.

CPU0					CPU1
intel_engine_breadcrumbs_irq
(queue request completion)
					i915_request_cancel_signaling
...					...
					i915_request_enable_signaling
dma_fence_signal

Hence in the time it took us to drop the lock to signal the request, a
preemption event may have occurred and re-queued the request. In the
process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and
so reused the rq->signal_link that was in use on CPU0, leading to bad
pointer chasing in intel_engine_breadcrumbs_irq.

A related issue was that if someone started listening for a signal on a
completed but no longer in-flight request, we missed the opportunity to
immediately signal that request.

Furthermore, as intel_contexts may be immediately released during
request retirement, in order to be entirely sure that
intel_engine_breadcrumbs_irq may no longer dereference the intel_context
(ce->signals and ce->signal_link), we must wait for irq spinlock.

In order to prevent the race, we use a bit in the fence.flags to signal
the transfer onto the signal list inside intel_engine_breadcrumbs_irq.
For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then
quickly signals to any outside observer that the fence is indeed signaled.

Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/dma-buf/dma-fence.c              |  1 +
 drivers/gpu/drm/i915/i915_request.c      |  1 +
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 52 ++++++++++++++----------
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 3aa8733f832a..9bf06042619a 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -29,6 +29,7 @@
 
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
 
 static DEFINE_SPINLOCK(dma_fence_stub_lock);
 static struct dma_fence dma_fence_stub;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index b836721d3b13..e0efc334463b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -432,6 +432,7 @@ void __i915_request_submit(struct i915_request *request)
 	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
 	    !i915_request_enable_breadcrumb(request))
 		intel_engine_queue_breadcrumbs(engine);
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 3cbffd400b1b..e19f84b006cc 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kthread.h>
+#include <trace/events/dma_fence.h>
 #include <uapi/linux/sched/types.h>
 
 #include "i915_drv.h"
@@ -83,6 +84,7 @@ static inline bool __request_completed(const struct i915_request *rq)
 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	const ktime_t timestamp = ktime_get();
 	struct intel_context *ce, *cn;
 	struct list_head *pos, *next;
 	LIST_HEAD(signal);
@@ -104,6 +106,11 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 
 			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
 					     &rq->fence.flags));
+			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+			if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+					     &rq->fence.flags))
+				continue;
 
 			/*
 			 * Queue for execution after dropping the signaling
@@ -111,14 +118,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 			 * more signalers to the same context or engine.
 			 */
 			i915_request_get(rq);
-
-			/*
-			 * We may race with direct invocation of
-			 * dma_fence_signal(), e.g. i915_request_retire(),
-			 * so we need to acquire our reference to the request
-			 * before we cancel the breadcrumb.
-			 */
-			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 			list_add_tail(&rq->signal_link, &signal);
 		}
 
@@ -140,8 +139,21 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 	list_for_each_safe(pos, next, &signal) {
 		struct i915_request *rq =
 			list_entry(pos, typeof(*rq), signal_link);
+		struct dma_fence_cb *cur, *tmp;
+
+		trace_dma_fence_signaled(&rq->fence);
+
+		rq->fence.timestamp = timestamp;
+		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &rq->fence.flags);
+
+		spin_lock(&rq->lock);
+		list_for_each_entry_safe(cur, tmp, &rq->fence.cb_list, node) {
+			INIT_LIST_HEAD(&cur->node);
+			cur->func(&rq->fence, cur);
+		}
+		INIT_LIST_HEAD(&rq->fence.cb_list);
+		spin_unlock(&rq->lock);
 
-		dma_fence_signal(&rq->fence);
 		i915_request_put(rq);
 	}
 }
@@ -243,19 +255,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 
 bool i915_request_enable_breadcrumb(struct i915_request *rq)
 {
-	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
-
-	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
-
-	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
-		return true;
+	lockdep_assert_held(&rq->lock);
+	lockdep_assert_irqs_disabled();
 
-	spin_lock(&b->irq_lock);
-	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
-	    !__request_completed(rq)) {
+	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
+		struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 		struct intel_context *ce = rq->hw_context;
 		struct list_head *pos;
 
+		spin_lock(&b->irq_lock);
+		GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
 		__intel_breadcrumbs_arm_irq(b);
 
 		/*
@@ -284,8 +294,8 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
 			list_move_tail(&ce->signal_link, &b->signalers);
 
 		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+		spin_unlock(&b->irq_lock);
 	}
-	spin_unlock(&b->irq_lock);
 
 	return !__request_completed(rq);
 }
@@ -294,8 +304,8 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
 {
 	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 
-	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
-		return;
+	lockdep_assert_held(&rq->lock);
+	lockdep_assert_irqs_disabled();
 
 	spin_lock(&b->irq_lock);
 	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 02/32] drm/i915: Verify workarounds immediately after application
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 03/32] drm/i915: Verify the engine workarounds stick on application Chris Wilson
                   ` (33 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Immediately after writing the workaround, verify that it stuck in the
register.

References: https://bugs.freedesktop.org/show_bug.cgi?id=108954
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_workarounds.c | 32 +++++++++++++-----------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index ccaf63679435..ea9292ee755a 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -913,6 +913,20 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 	return fw;
 }
 
+static bool
+wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
+{
+	if ((cur ^ wa->val) & wa->mask) {
+		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
+			  name, from, i915_mmio_reg_offset(wa->reg), cur,
+			  cur & wa->mask, wa->val, wa->mask);
+
+		return false;
+	}
+
+	return true;
+}
+
 static void
 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 {
@@ -931,6 +945,10 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 
 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
 		intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
+		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+			wa_verify(wa,
+				  intel_uncore_read_fw(uncore, wa->reg),
+				  wal->name, "application");
 	}
 
 	intel_uncore_forcewake_put__locked(uncore, fw);
@@ -942,20 +960,6 @@ void intel_gt_apply_workarounds(struct drm_i915_private *i915)
 	wa_list_apply(&i915->uncore, &i915->gt_wa_list);
 }
 
-static bool
-wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
-{
-	if ((cur ^ wa->val) & wa->mask) {
-		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
-			  name, from, i915_mmio_reg_offset(wa->reg), cur,
-			  cur & wa->mask, wa->val, wa->mask);
-
-		return false;
-	}
-
-	return true;
-}
-
 static bool wa_list_verify(struct intel_uncore *uncore,
 			   const struct i915_wa_list *wal,
 			   const char *from)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 03/32] drm/i915: Verify the engine workarounds stick on application
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
  2019-04-17  7:56 ` [PATCH 02/32] drm/i915: Verify workarounds immediately after application Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 04/32] drm/i915: Make workaround verification *optional* Chris Wilson
                   ` (32 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Read the engine workarounds back using the GPU after loading the initial
context state to verify that we are setting them correctly, and bail if
it fails.

v2: Break out the verification into its own loop

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c               |  21 +++
 drivers/gpu/drm/i915/intel_workarounds.c      | 120 ++++++++++++++++++
 drivers/gpu/drm/i915/intel_workarounds.h      |   2 +
 .../drm/i915/selftests/intel_workarounds.c    |  53 +-------
 4 files changed, 149 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0a818a60ad31..a5412323fee1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4842,6 +4842,23 @@ static void i915_gem_fini_scratch(struct drm_i915_private *i915)
 	i915_vma_unpin_and_release(&i915->gt.scratch, 0);
 }
 
+static int intel_engines_verify_workarounds(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		return 0;
+
+	for_each_engine(engine, i915, id) {
+		if (intel_engine_verify_workarounds(engine, "load"))
+			err = -EIO;
+	}
+
+	return err;
+}
+
 int i915_gem_init(struct drm_i915_private *dev_priv)
 {
 	int ret;
@@ -4927,6 +4944,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	 */
 	intel_init_clock_gating(dev_priv);
 
+	ret = intel_engines_verify_workarounds(dev_priv);
+	if (ret)
+		goto err_init_hw;
+
 	ret = __intel_engines_record_defaults(dev_priv);
 	if (ret)
 		goto err_init_hw;
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index ea9292ee755a..89e2c603e34b 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -1259,6 +1259,126 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
 	wa_list_apply(engine->uncore, &engine->wa_list);
 }
 
+static struct i915_vma *
+create_scratch(struct i915_address_space *vm, int count)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	unsigned int size;
+	int err;
+
+	size = round_up(count * sizeof(u32), PAGE_SIZE);
+	obj = i915_gem_object_create_internal(vm->i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0,
+			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
+	if (err)
+		goto err_obj;
+
+	return vma;
+
+err_obj:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+static int
+wa_list_srm(struct i915_request *rq,
+	    const struct i915_wa_list *wal,
+	    struct i915_vma *vma)
+{
+	const struct i915_wa *wa;
+	unsigned int i;
+	u32 srm, *cs;
+
+	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+	if (INTEL_GEN(rq->i915) >= 8)
+		srm++;
+
+	cs = intel_ring_begin(rq, 4 * wal->count);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+		*cs++ = srm;
+		*cs++ = i915_mmio_reg_offset(wa->reg);
+		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
+		*cs++ = 0;
+	}
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int engine_wa_list_verify(struct intel_engine_cs *engine,
+				 const struct i915_wa_list * const wal,
+				 const char *from)
+{
+	const struct i915_wa *wa;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	unsigned int i;
+	u32 *results;
+	int err;
+
+	if (!wal->count)
+		return 0;
+
+	vma = create_scratch(&engine->i915->ggtt.vm, wal->count);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	rq = i915_request_alloc(engine, engine->kernel_context->gem_context);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_vma;
+	}
+
+	err = wa_list_srm(rq, wal, vma);
+	if (err)
+		goto err_vma;
+
+	i915_request_add(rq);
+	if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+		err = -ETIME;
+		goto err_vma;
+	}
+
+	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(results)) {
+		err = PTR_ERR(results);
+		goto err_vma;
+	}
+
+	err = 0;
+	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+		if (!wa_verify(wa, results[i], wal->name, from))
+			err = -ENXIO;
+
+	i915_gem_object_unpin_map(vma->obj);
+
+err_vma:
+	i915_vma_unpin(vma);
+	i915_vma_put(vma);
+	return err;
+}
+
+int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
+				    const char *from)
+{
+	return engine_wa_list_verify(engine, &engine->wa_list, from);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/intel_workarounds.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h
index 34eee5ec511e..fdf7ebb90f28 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.h
+++ b/drivers/gpu/drm/i915/intel_workarounds.h
@@ -30,5 +30,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine);
 
 void intel_engine_init_workarounds(struct intel_engine_cs *engine);
 void intel_engine_apply_workarounds(struct intel_engine_cs *engine);
+int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
+				    const char *from);
 
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 567b6f8dae86..a363748a7a4f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -340,49 +340,6 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
 	return err;
 }
 
-static struct i915_vma *create_scratch(struct i915_gem_context *ctx)
-{
-	struct drm_i915_gem_object *obj;
-	struct i915_vma *vma;
-	void *ptr;
-	int err;
-
-	obj = i915_gem_object_create_internal(ctx->i915, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return ERR_CAST(obj);
-
-	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
-
-	ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
-	if (IS_ERR(ptr)) {
-		err = PTR_ERR(ptr);
-		goto err_obj;
-	}
-	memset(ptr, 0xc5, PAGE_SIZE);
-	i915_gem_object_flush_map(obj);
-	i915_gem_object_unpin_map(obj);
-
-	vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
-	if (IS_ERR(vma)) {
-		err = PTR_ERR(vma);
-		goto err_obj;
-	}
-
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (err)
-		goto err_obj;
-
-	err = i915_gem_object_set_to_cpu_domain(obj, false);
-	if (err)
-		goto err_obj;
-
-	return vma;
-
-err_obj:
-	i915_gem_object_put(obj);
-	return ERR_PTR(err);
-}
-
 static struct i915_vma *create_batch(struct i915_gem_context *ctx)
 {
 	struct drm_i915_gem_object *obj;
@@ -475,7 +432,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 	int err = 0, i, v;
 	u32 *cs, *results;
 
-	scratch = create_scratch(ctx);
+	scratch = create_scratch(&ctx->ppgtt->vm, 2 * ARRAY_SIZE(values) + 1);
 	if (IS_ERR(scratch))
 		return PTR_ERR(scratch);
 
@@ -752,9 +709,11 @@ static bool verify_gt_engine_wa(struct drm_i915_private *i915,
 
 	ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str);
 
-	for_each_engine(engine, i915, id)
-		ok &= wa_list_verify(engine->uncore,
-				     &lists->engine[id].wa_list, str);
+	for_each_engine(engine, i915, id) {
+		ok &= engine_wa_list_verify(engine,
+					    &lists->engine[id].wa_list,
+					    str) == 0;
+	}
 
 	return ok;
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 04/32] drm/i915: Make workaround verification *optional*
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
  2019-04-17  7:56 ` [PATCH 02/32] drm/i915: Verify workarounds immediately after application Chris Wilson
  2019-04-17  7:56 ` [PATCH 03/32] drm/i915: Verify the engine workarounds stick on application Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  9:37   ` Tvrtko Ursulin
  2019-04-17  7:56 ` [PATCH 05/32] drm/i915/selftests: Verify whitelist of context registers Chris Wilson
                   ` (31 subsequent siblings)
  34 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Sometimes the HW doesn't even play fair, and completely forgets about
register writes. Skip verifying known troublemakers.

References: https://bugs.freedesktop.org/show_bug.cgi?id=108954
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_workarounds.c      | 40 ++++++++++++++-----
 .../gpu/drm/i915/intel_workarounds_types.h    |  7 ++--
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 89e2c603e34b..b3cbed1ee1c9 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -122,6 +122,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
 			wal->wa_count++;
 			wa_->val |= wa->val;
 			wa_->mask |= wa->mask;
+			wa_->read |= wa->read;
 			return;
 		}
 	}
@@ -146,9 +147,10 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
 		   u32 val)
 {
 	struct i915_wa wa = {
-		.reg = reg,
+		.reg  = reg,
 		.mask = mask,
-		.val = val
+		.val  = val,
+		.read = mask,
 	};
 
 	_wa_add(wal, &wa);
@@ -172,6 +174,19 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 	wa_write_masked_or(wal, reg, val, val);
 }
 
+static void
+ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
+{
+	struct i915_wa wa = {
+		.reg  = reg,
+		.mask = mask,
+		.val  = val,
+		/* Bonkers HW, skip verifying */
+	};
+
+	_wa_add(wal, &wa);
+}
+
 #define WA_SET_BIT_MASKED(addr, mask) \
 	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
 
@@ -916,10 +931,11 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 static bool
 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
 {
-	if ((cur ^ wa->val) & wa->mask) {
+	if ((cur ^ wa->val) & wa->read) {
 		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
-			  name, from, i915_mmio_reg_offset(wa->reg), cur,
-			  cur & wa->mask, wa->val, wa->mask);
+			  name, from, i915_mmio_reg_offset(wa->reg),
+			  cur, cur & wa->read,
+			  wa->val, wa->mask);
 
 		return false;
 	}
@@ -1122,9 +1138,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
 
 		/* WaPipelineFlushCoherentLines:icl */
-		wa_write_or(wal,
-			    GEN8_L3SQCREG4,
-			    GEN8_LQSC_FLUSH_COHERENT_LINES);
+		ignore_wa_write_or(wal,
+				   GEN8_L3SQCREG4,
+				   GEN8_LQSC_FLUSH_COHERENT_LINES,
+				   GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 		/*
 		 * Wa_1405543622:icl
@@ -1151,9 +1168,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		 * Wa_1405733216:icl
 		 * Formerly known as WaDisableCleanEvicts
 		 */
-		wa_write_or(wal,
-			    GEN8_L3SQCREG4,
-			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
+		ignore_wa_write_or(wal,
+				   GEN8_L3SQCREG4,
+				   GEN11_LQSC_CLEAN_EVICT_DISABLE,
+				   GEN11_LQSC_CLEAN_EVICT_DISABLE);
 
 		/* WaForwardProgressSoftReset:icl */
 		wa_write_or(wal,
diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/intel_workarounds_types.h
index 30918da180ff..42ac1fb99572 100644
--- a/drivers/gpu/drm/i915/intel_workarounds_types.h
+++ b/drivers/gpu/drm/i915/intel_workarounds_types.h
@@ -12,9 +12,10 @@
 #include "i915_reg.h"
 
 struct i915_wa {
-	i915_reg_t	  reg;
-	u32		  mask;
-	u32		  val;
+	i915_reg_t	reg;
+	u32		mask;
+	u32		val;
+	u32		read;
 };
 
 struct i915_wa_list {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 05/32] drm/i915/selftests: Verify whitelist of context registers
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (2 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 04/32] drm/i915: Make workaround verification *optional* Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 06/32] drm/i915: Store the default sseu setup on the engine Chris Wilson
                   ` (30 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

The RING_NONPRIV allows us to add registers to a whitelist that allows
userspace to modify them. Ideally such registers should be safe and
saved within the context such that they do not impact system behaviour
for other users. This selftest verifies that those registers we do add
are (a) then writable by userspace and (b) only affect a single client.

Opens:
- Is GEN9_SLICE_COMMON_ECO_CHICKEN1 really write-only?

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../drm/i915/selftests/intel_workarounds.c    | 322 ++++++++++++++++++
 1 file changed, 322 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index a363748a7a4f..6f941c31dcab 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -700,6 +700,327 @@ static int live_reset_whitelist(void *arg)
 	return err;
 }
 
+static int read_whitelisted_registers(struct i915_gem_context *ctx,
+				      struct intel_engine_cs *engine,
+				      struct i915_vma *results)
+{
+	intel_wakeref_t wakeref;
+	struct i915_request *rq;
+	u32 srm, *cs;
+	int err, i;
+
+	rq = ERR_PTR(-ENODEV);
+	with_intel_runtime_pm(engine->i915, wakeref)
+		rq = i915_request_alloc(engine, ctx);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto err_req;
+
+	srm = MI_STORE_REGISTER_MEM;
+	if (INTEL_GEN(ctx->i915) >= 8)
+		srm++;
+
+	cs = intel_ring_begin(rq, 4 * engine->whitelist.count);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_req;
+	}
+
+	for (i = 0; i < engine->whitelist.count; i++) {
+		u64 offset = results->node.start + sizeof(u32) * i;
+
+		*cs++ = srm;
+		*cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+		*cs++ = lower_32_bits(offset);
+		*cs++ = upper_32_bits(offset);
+	}
+	intel_ring_advance(rq, cs);
+
+err_req:
+	i915_request_add(rq);
+
+	if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0)
+		err = -EIO;
+
+	return err;
+}
+
+static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
+				       struct intel_engine_cs *engine)
+{
+	intel_wakeref_t wakeref;
+	struct i915_request *rq;
+	struct i915_vma *batch;
+	int i, err;
+	u32 *cs;
+
+	batch = create_batch(ctx);
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
+
+	cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_batch;
+	}
+
+	*cs++ = MI_LOAD_REGISTER_IMM(engine->whitelist.count);
+	for (i = 0; i < engine->whitelist.count; i++) {
+		*cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+		*cs++ = 0xffffffff;
+	}
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(batch->obj);
+	i915_gem_chipset_flush(ctx->i915);
+
+	rq = ERR_PTR(-ENODEV);
+	with_intel_runtime_pm(engine->i915, wakeref)
+		rq = i915_request_alloc(engine, ctx);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	if (engine->emit_init_breadcrumb) { /* Be nice if we hang */
+		err = engine->emit_init_breadcrumb(rq);
+		if (err)
+			goto err_request;
+	}
+
+	err = engine->emit_bb_start(rq, batch->node.start, 0, 0);
+
+err_request:
+	i915_request_add(rq);
+	if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0)
+		err = -EIO;
+
+err_unpin:
+	i915_gem_object_unpin_map(batch->obj);
+err_batch:
+	i915_vma_unpin_and_release(&batch, 0);
+	return err;
+}
+
+static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg)
+{
+	/* Alas, we must pardon some whitelists */
+	static const struct {
+		i915_reg_t reg;
+		unsigned long gen_mask;
+	} pardon[] = {
+		{ GEN9_CTX_PREEMPT_REG, INTEL_GEN_MASK(9, 9) },
+		{ GEN8_L3SQCREG4, INTEL_GEN_MASK(9, 9) },
+	};
+	u32 offset = i915_mmio_reg_offset(reg);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pardon); i++) {
+		if (INTEL_INFO(i915)->gen_mask & pardon[i].gen_mask &&
+		    i915_mmio_reg_offset(pardon[i].reg) == offset)
+			return true;
+	}
+
+	return false;
+}
+
+static int eq_whitelisted_registers(struct i915_vma *A,
+				    struct i915_vma *B,
+				    struct intel_engine_cs *engine)
+{
+	u32 *a, *b;
+	int i, err;
+
+	a = i915_gem_object_pin_map(A->obj, I915_MAP_WB);
+	if (IS_ERR(a))
+		return PTR_ERR(a);
+
+	b = i915_gem_object_pin_map(B->obj, I915_MAP_WB);
+	if (IS_ERR(b)) {
+		err = PTR_ERR(b);
+		goto err_a;
+	}
+
+	err = 0;
+	for (i = 0; i < engine->whitelist.count; i++) {
+		if (a[i] != b[i] &&
+		    !pardon_reg(engine->i915, engine->whitelist.list[i].reg)) {
+			pr_err("[%d] Whitelisted register 0x%4x not context saved: A=%08x, B=%08x\n",
+			       i, i915_mmio_reg_offset(engine->whitelist.list[i].reg),
+			       a[i], b[i]);
+			err = -EINVAL;
+		}
+	}
+
+	i915_gem_object_unpin_map(B->obj);
+err_a:
+	i915_gem_object_unpin_map(A->obj);
+	return err;
+}
+
+static bool writeonly_reg(struct drm_i915_private *i915, i915_reg_t reg)
+{
+	static const struct {
+		i915_reg_t reg;
+		unsigned long gen_mask;
+	} wo[] = {
+		{ GEN9_SLICE_COMMON_ECO_CHICKEN1, INTEL_GEN_MASK(9, 9) },
+	};
+	u32 offset = i915_mmio_reg_offset(reg);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(wo); i++) {
+		if (INTEL_INFO(i915)->gen_mask & wo[i].gen_mask &&
+		    i915_mmio_reg_offset(wo[i].reg) == offset)
+			return true;
+	}
+
+	return false;
+}
+
+static int neq_whitelisted_registers(struct i915_vma *A,
+				     struct i915_vma *B,
+				     struct intel_engine_cs *engine)
+{
+	u32 *a, *b;
+	int i, err;
+
+	a = i915_gem_object_pin_map(A->obj, I915_MAP_WB);
+	if (IS_ERR(a))
+		return PTR_ERR(a);
+
+	b = i915_gem_object_pin_map(B->obj, I915_MAP_WB);
+	if (IS_ERR(b)) {
+		err = PTR_ERR(b);
+		goto err_a;
+	}
+
+	err = 0;
+	for (i = 0; i < engine->whitelist.count; i++) {
+		if (a[i] == b[i] &&
+		    !writeonly_reg(engine->i915, engine->whitelist.list[i].reg)) {
+			pr_err("[%d] Whitelist register 0x%4x:%08x was unwritable\n",
+			       i, i915_mmio_reg_offset(engine->whitelist.list[i].reg), a[i]);
+			err = -EINVAL;
+		}
+	}
+
+	i915_gem_object_unpin_map(B->obj);
+err_a:
+	i915_gem_object_unpin_map(A->obj);
+	return err;
+}
+
+static int live_isolated_whitelist(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct {
+		struct i915_gem_context *ctx;
+		struct i915_vma *scratch[2];
+	} client[2] = {};
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int i, err = 0;
+
+	/*
+	 * Check that a write into a whitelist register works, but
+	 * invisible to a second context.
+	 */
+
+	if (!intel_engines_has_context_isolation(i915))
+		return 0;
+
+	if (!i915->kernel_context->ppgtt)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(client); i++) {
+		struct i915_gem_context *c;
+
+		c = kernel_context(i915);
+		if (IS_ERR(c)) {
+			err = PTR_ERR(c);
+			goto err;
+		}
+
+		client[i].scratch[0] = create_scratch(&c->ppgtt->vm, 1024);
+		if (IS_ERR(client[i].scratch[0])) {
+			err = PTR_ERR(client[i].scratch[0]);
+			kernel_context_close(c);
+			goto err;
+		}
+
+		client[i].scratch[1] = create_scratch(&c->ppgtt->vm, 1024);
+		if (IS_ERR(client[i].scratch[1])) {
+			err = PTR_ERR(client[i].scratch[1]);
+			i915_vma_unpin_and_release(&client[i].scratch[0], 0);
+			kernel_context_close(c);
+			goto err;
+		}
+
+		client[i].ctx = c;
+	}
+
+	for_each_engine(engine, i915, id) {
+		if (!engine->whitelist.count)
+			continue;
+
+		/* Read default values */
+		err = read_whitelisted_registers(client[0].ctx, engine,
+						 client[0].scratch[0]);
+		if (err)
+			goto err;
+
+		/* Try to overwrite registers (should only affect ctx0) */
+		err = scrub_whitelisted_registers(client[0].ctx, engine);
+		if (err)
+			goto err;
+
+		/* Read values from ctx1, we expect these to be defaults */
+		err = read_whitelisted_registers(client[1].ctx, engine,
+						 client[1].scratch[0]);
+		if (err)
+			goto err;
+
+		/* Verify that both reads return the same default values */
+		err = eq_whitelisted_registers(client[0].scratch[0],
+					       client[1].scratch[0],
+					       engine);
+		if (err)
+			goto err;
+
+		/* Read back the updated values in ctx0 */
+		err = read_whitelisted_registers(client[0].ctx, engine,
+						 client[0].scratch[1]);
+		if (err)
+			goto err;
+
+		/* User should be granted privilege to overwhite regs */
+		err = neq_whitelisted_registers(client[0].scratch[0],
+						client[0].scratch[1],
+						engine);
+		if (err)
+			goto err;
+	}
+
+err:
+	for (i = 0; i < ARRAY_SIZE(client); i++) {
+		if (!client[i].ctx)
+			break;
+
+		i915_vma_unpin_and_release(&client[i].scratch[1], 0);
+		i915_vma_unpin_and_release(&client[i].scratch[0], 0);
+		kernel_context_close(client[i].ctx);
+	}
+
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	return err;
+}
+
 static bool verify_gt_engine_wa(struct drm_i915_private *i915,
 				struct wa_lists *lists, const char *str)
 {
@@ -844,6 +1165,7 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_dirty_whitelist),
 		SUBTEST(live_reset_whitelist),
+		SUBTEST(live_isolated_whitelist),
 		SUBTEST(live_gpu_reset_gt_engine_workarounds),
 		SUBTEST(live_engine_reset_gt_engine_workarounds),
 	};
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 06/32] drm/i915: Store the default sseu setup on the engine
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (3 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 05/32] drm/i915/selftests: Verify whitelist of context registers Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  9:40   ` Tvrtko Ursulin
  2019-04-17  7:56 ` [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/ Chris Wilson
                   ` (29 subsequent siblings)
  34 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

As we push for better compartmentalisation, it is more convenient to
copy the default sseu configuration from the engine into the derived
logical context, than it is to dig it out from i915->runtime_info.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/Makefile.header-test     |   1 +
 drivers/gpu/drm/i915/i915_drv.h               |  14 --
 drivers/gpu/drm/i915/i915_gem_context.c       |   2 +-
 drivers/gpu/drm/i915/i915_perf.c              |   2 +-
 drivers/gpu/drm/i915/intel_context.c          |   4 +-
 drivers/gpu/drm/i915/intel_context_types.h    |  11 +-
 drivers/gpu/drm/i915/intel_device_info.h      |  28 +---
 drivers/gpu/drm/i915/intel_engine_cs.c        |   4 +
 drivers/gpu/drm/i915/intel_engine_types.h     |   3 +
 drivers/gpu/drm/i915/intel_lrc.c              | 134 +----------------
 drivers/gpu/drm/i915/intel_lrc.h              |   2 -
 drivers/gpu/drm/i915/intel_sseu.c             | 142 ++++++++++++++++++
 drivers/gpu/drm/i915/intel_sseu.h             |  67 +++++++++
 .../gpu/drm/i915/selftests/i915_gem_context.c |   5 +-
 15 files changed, 226 insertions(+), 194 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_sseu.c
 create mode 100644 drivers/gpu/drm/i915/intel_sseu.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index fbcb0904f4a8..53ff209b91bb 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -95,6 +95,7 @@ i915-y += \
 	  intel_lrc.o \
 	  intel_mocs.o \
 	  intel_ringbuffer.o \
+	  intel_sseu.o \
 	  intel_uncore.o \
 	  intel_wopcm.o
 
diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
index c1c391816fa7..5bcc78d7ac96 100644
--- a/drivers/gpu/drm/i915/Makefile.header-test
+++ b/drivers/gpu/drm/i915/Makefile.header-test
@@ -33,6 +33,7 @@ header_test := \
 	intel_psr.h \
 	intel_sdvo.h \
 	intel_sprite.h \
+	intel_sseu.h \
 	intel_tv.h \
 	intel_workarounds_types.h
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 35d0782c077e..7b5da9eddc1c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3387,20 +3387,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
 	return (struct intel_device_info *)INTEL_INFO(dev_priv);
 }
 
-static inline struct intel_sseu
-intel_device_default_sseu(struct drm_i915_private *i915)
-{
-	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
-	struct intel_sseu value = {
-		.slice_mask = sseu->slice_mask,
-		.subslice_mask = sseu->subslice_mask[0],
-		.min_eus_per_subslice = sseu->max_eus_per_subslice,
-		.max_eus_per_subslice = sseu->max_eus_per_subslice,
-	};
-
-	return value;
-}
-
 /* modesetting */
 extern void intel_modeset_init_hw(struct drm_device *dev);
 extern int intel_modeset_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index dd728b26b5aa..c02a30612df9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1156,7 +1156,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 	*cs++ = lower_32_bits(offset);
 	*cs++ = upper_32_bits(offset);
-	*cs++ = gen8_make_rpcs(rq->i915, &sseu);
+	*cs++ = intel_sseu_make_rpcs(rq->i915, &sseu);
 
 	intel_ring_advance(rq, cs);
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 39a4804091d7..56da457bed21 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1679,7 +1679,7 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
 
 	CTX_REG(reg_state,
 		CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
-		gen8_make_rpcs(i915, &ce->sseu));
+		intel_sseu_make_rpcs(i915, &ce->sseu));
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 8931e0fee873..961d1445833d 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -230,15 +230,13 @@ intel_context_init(struct intel_context *ce,
 	ce->gem_context = ctx;
 	ce->engine = engine;
 	ce->ops = engine->cops;
+	ce->sseu = engine->sseu;
 
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
 
 	mutex_init(&ce->pin_mutex);
 
-	/* Use the whole device by default */
-	ce->sseu = intel_device_default_sseu(ctx->i915);
-
 	i915_active_request_init(&ce->active_tracker,
 				 NULL, intel_context_retire);
 }
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 68b4ca1611e0..9ec4f787c908 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 
 #include "i915_active_types.h"
+#include "intel_sseu.h"
 
 struct i915_gem_context;
 struct i915_vma;
@@ -28,16 +29,6 @@ struct intel_context_ops {
 	void (*destroy)(struct kref *kref);
 };
 
-/*
- * Powergating configuration for a particular (context,engine).
- */
-struct intel_sseu {
-	u8 slice_mask;
-	u8 subslice_mask;
-	u8 min_eus_per_subslice;
-	u8 max_eus_per_subslice;
-};
-
 struct intel_context {
 	struct kref ref;
 
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 0e579f158016..3045e0dee2a1 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -29,6 +29,7 @@
 
 #include "intel_engine_types.h"
 #include "intel_display.h"
+#include "intel_sseu.h"
 
 struct drm_printer;
 struct drm_i915_private;
@@ -139,33 +140,6 @@ enum intel_ppgtt_type {
 	func(overlay_needs_physical); \
 	func(supports_tv);
 
-#define GEN_MAX_SLICES		(6) /* CNL upper bound */
-#define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
-
-struct sseu_dev_info {
-	u8 slice_mask;
-	u8 subslice_mask[GEN_MAX_SLICES];
-	u16 eu_total;
-	u8 eu_per_subslice;
-	u8 min_eu_in_pool;
-	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
-	u8 subslice_7eu[3];
-	u8 has_slice_pg:1;
-	u8 has_subslice_pg:1;
-	u8 has_eu_pg:1;
-
-	/* Topology fields */
-	u8 max_slices;
-	u8 max_subslices;
-	u8 max_eus_per_subslice;
-
-	/* We don't have more than 8 eus per subslice at the moment and as we
-	 * store eus enabled using bits, no need to multiply by eus per
-	 * subslice.
-	 */
-	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
-};
-
 struct intel_device_info {
 	u16 gen_mask;
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index eea9bec04f1b..ad2a683d97f7 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -588,6 +588,10 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_batch_pool(engine);
 	intel_engine_init_cmd_parser(engine);
 
+	/* Use the whole device by default */
+	engine->sseu =
+		intel_device_default_sseu(&RUNTIME_INFO(engine->i915)->sseu);
+
 	return 0;
 
 err_hwsp:
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 1f970c76b6a6..d07a01b3ed0b 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -17,6 +17,7 @@
 #include "i915_priolist_types.h"
 #include "i915_selftest.h"
 #include "i915_timeline_types.h"
+#include "intel_sseu.h"
 #include "intel_workarounds_types.h"
 
 #include "i915_gem_batch_pool.h"
@@ -278,6 +279,8 @@ struct intel_engine_cs {
 	u32 context_size;
 	u32 mmio_base;
 
+	struct intel_sseu sseu;
+
 	struct intel_ring *buffer;
 
 	struct i915_timeline timeline;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4e0a351bfbca..18a9dc6ca877 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1232,7 +1232,7 @@ __execlists_update_reg_state(struct intel_context *ce,
 	/* RPCS */
 	if (engine->class == RENDER_CLASS)
 		regs[CTX_R_PWR_CLK_STATE + 1] =
-			gen8_make_rpcs(engine->i915, &ce->sseu);
+			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
 }
 
 static int
@@ -2551,138 +2551,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
 	return logical_ring_init(engine);
 }
 
-u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
-{
-	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
-	bool subslice_pg = sseu->has_subslice_pg;
-	struct intel_sseu ctx_sseu;
-	u8 slices, subslices;
-	u32 rpcs = 0;
-
-	/*
-	 * No explicit RPCS request is needed to ensure full
-	 * slice/subslice/EU enablement prior to Gen9.
-	*/
-	if (INTEL_GEN(i915) < 9)
-		return 0;
-
-	/*
-	 * If i915/perf is active, we want a stable powergating configuration
-	 * on the system.
-	 *
-	 * We could choose full enablement, but on ICL we know there are use
-	 * cases which disable slices for functional, apart for performance
-	 * reasons. So in this case we select a known stable subset.
-	 */
-	if (!i915->perf.oa.exclusive_stream) {
-		ctx_sseu = *req_sseu;
-	} else {
-		ctx_sseu = intel_device_default_sseu(i915);
-
-		if (IS_GEN(i915, 11)) {
-			/*
-			 * We only need subslice count so it doesn't matter
-			 * which ones we select - just turn off low bits in the
-			 * amount of half of all available subslices per slice.
-			 */
-			ctx_sseu.subslice_mask =
-				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
-			ctx_sseu.slice_mask = 0x1;
-		}
-	}
-
-	slices = hweight8(ctx_sseu.slice_mask);
-	subslices = hweight8(ctx_sseu.subslice_mask);
-
-	/*
-	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
-	 * wide and Icelake has up to eight subslices, specfial programming is
-	 * needed in order to correctly enable all subslices.
-	 *
-	 * According to documentation software must consider the configuration
-	 * as 2x4x8 and hardware will translate this to 1x8x8.
-	 *
-	 * Furthemore, even though SScount is three bits, maximum documented
-	 * value for it is four. From this some rules/restrictions follow:
-	 *
-	 * 1.
-	 * If enabled subslice count is greater than four, two whole slices must
-	 * be enabled instead.
-	 *
-	 * 2.
-	 * When more than one slice is enabled, hardware ignores the subslice
-	 * count altogether.
-	 *
-	 * From these restrictions it follows that it is not possible to enable
-	 * a count of subslices between the SScount maximum of four restriction,
-	 * and the maximum available number on a particular SKU. Either all
-	 * subslices are enabled, or a count between one and four on the first
-	 * slice.
-	 */
-	if (IS_GEN(i915, 11) &&
-	    slices == 1 &&
-	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
-		GEM_BUG_ON(subslices & 1);
-
-		subslice_pg = false;
-		slices *= 2;
-	}
-
-	/*
-	 * Starting in Gen9, render power gating can leave
-	 * slice/subslice/EU in a partially enabled state. We
-	 * must make an explicit request through RPCS for full
-	 * enablement.
-	*/
-	if (sseu->has_slice_pg) {
-		u32 mask, val = slices;
-
-		if (INTEL_GEN(i915) >= 11) {
-			mask = GEN11_RPCS_S_CNT_MASK;
-			val <<= GEN11_RPCS_S_CNT_SHIFT;
-		} else {
-			mask = GEN8_RPCS_S_CNT_MASK;
-			val <<= GEN8_RPCS_S_CNT_SHIFT;
-		}
-
-		GEM_BUG_ON(val & ~mask);
-		val &= mask;
-
-		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
-	}
-
-	if (subslice_pg) {
-		u32 val = subslices;
-
-		val <<= GEN8_RPCS_SS_CNT_SHIFT;
-
-		GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
-		val &= GEN8_RPCS_SS_CNT_MASK;
-
-		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
-	}
-
-	if (sseu->has_eu_pg) {
-		u32 val;
-
-		val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
-		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
-		val &= GEN8_RPCS_EU_MIN_MASK;
-
-		rpcs |= val;
-
-		val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
-		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
-		val &= GEN8_RPCS_EU_MAX_MASK;
-
-		rpcs |= val;
-
-		rpcs |= GEN8_RPCS_ENABLE;
-	}
-
-	return rpcs;
-}
-
 static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 {
 	u32 indirect_ctx_offset;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 84aa230ea27b..99f75ee9d087 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -115,6 +115,4 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 							const char *prefix),
 				   unsigned int max);
 
-u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
-
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/intel_sseu.c
new file mode 100644
index 000000000000..cfc80813f662
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_sseu.c
@@ -0,0 +1,142 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_lrc_reg.h"
+#include "intel_sseu.h"
+
+u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
+			 const struct intel_sseu *req_sseu)
+{
+	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+	bool subslice_pg = sseu->has_subslice_pg;
+	struct intel_sseu ctx_sseu;
+	u8 slices, subslices;
+	u32 rpcs = 0;
+
+	/*
+	 * No explicit RPCS request is needed to ensure full
+	 * slice/subslice/EU enablement prior to Gen9.
+	 */
+	if (INTEL_GEN(i915) < 9)
+		return 0;
+
+	/*
+	 * If i915/perf is active, we want a stable powergating configuration
+	 * on the system.
+	 *
+	 * We could choose full enablement, but on ICL we know there are use
+	 * cases which disable slices for functional, apart for performance
+	 * reasons. So in this case we select a known stable subset.
+	 */
+	if (!i915->perf.oa.exclusive_stream) {
+		ctx_sseu = *req_sseu;
+	} else {
+		ctx_sseu = intel_device_default_sseu(sseu);
+
+		if (IS_GEN(i915, 11)) {
+			/*
+			 * We only need subslice count so it doesn't matter
+			 * which ones we select - just turn off low bits in the
+			 * amount of half of all available subslices per slice.
+			 */
+			ctx_sseu.subslice_mask =
+				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
+			ctx_sseu.slice_mask = 0x1;
+		}
+	}
+
+	slices = hweight8(ctx_sseu.slice_mask);
+	subslices = hweight8(ctx_sseu.subslice_mask);
+
+	/*
+	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
+	 * wide and Icelake has up to eight subslices, specfial programming is
+	 * needed in order to correctly enable all subslices.
+	 *
+	 * According to documentation software must consider the configuration
+	 * as 2x4x8 and hardware will translate this to 1x8x8.
+	 *
+	 * Furthemore, even though SScount is three bits, maximum documented
+	 * value for it is four. From this some rules/restrictions follow:
+	 *
+	 * 1.
+	 * If enabled subslice count is greater than four, two whole slices must
+	 * be enabled instead.
+	 *
+	 * 2.
+	 * When more than one slice is enabled, hardware ignores the subslice
+	 * count altogether.
+	 *
+	 * From these restrictions it follows that it is not possible to enable
+	 * a count of subslices between the SScount maximum of four restriction,
+	 * and the maximum available number on a particular SKU. Either all
+	 * subslices are enabled, or a count between one and four on the first
+	 * slice.
+	 */
+	if (IS_GEN(i915, 11) &&
+	    slices == 1 &&
+	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
+		GEM_BUG_ON(subslices & 1);
+
+		subslice_pg = false;
+		slices *= 2;
+	}
+
+	/*
+	 * Starting in Gen9, render power gating can leave
+	 * slice/subslice/EU in a partially enabled state. We
+	 * must make an explicit request through RPCS for full
+	 * enablement.
+	 */
+	if (sseu->has_slice_pg) {
+		u32 mask, val = slices;
+
+		if (INTEL_GEN(i915) >= 11) {
+			mask = GEN11_RPCS_S_CNT_MASK;
+			val <<= GEN11_RPCS_S_CNT_SHIFT;
+		} else {
+			mask = GEN8_RPCS_S_CNT_MASK;
+			val <<= GEN8_RPCS_S_CNT_SHIFT;
+		}
+
+		GEM_BUG_ON(val & ~mask);
+		val &= mask;
+
+		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
+	}
+
+	if (subslice_pg) {
+		u32 val = subslices;
+
+		val <<= GEN8_RPCS_SS_CNT_SHIFT;
+
+		GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
+		val &= GEN8_RPCS_SS_CNT_MASK;
+
+		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
+	}
+
+	if (sseu->has_eu_pg) {
+		u32 val;
+
+		val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
+		val &= GEN8_RPCS_EU_MIN_MASK;
+
+		rpcs |= val;
+
+		val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
+		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
+		val &= GEN8_RPCS_EU_MAX_MASK;
+
+		rpcs |= val;
+
+		rpcs |= GEN8_RPCS_ENABLE;
+	}
+
+	return rpcs;
+}
diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/intel_sseu.h
new file mode 100644
index 000000000000..bf6fa019fd00
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_sseu.h
@@ -0,0 +1,67 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_SSEU_H__
+#define __INTEL_SSEU_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+
+#define GEN_MAX_SLICES		(6) /* CNL upper bound */
+#define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
+
+struct sseu_dev_info {
+	u8 slice_mask;
+	u8 subslice_mask[GEN_MAX_SLICES];
+	u16 eu_total;
+	u8 eu_per_subslice;
+	u8 min_eu_in_pool;
+	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
+	u8 subslice_7eu[3];
+	u8 has_slice_pg:1;
+	u8 has_subslice_pg:1;
+	u8 has_eu_pg:1;
+
+	/* Topology fields */
+	u8 max_slices;
+	u8 max_subslices;
+	u8 max_eus_per_subslice;
+
+	/* We don't have more than 8 eus per subslice at the moment and as we
+	 * store eus enabled using bits, no need to multiply by eus per
+	 * subslice.
+	 */
+	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
+};
+
+/*
+ * Powergating configuration for a particular (context,engine).
+ */
+struct intel_sseu {
+	u8 slice_mask;
+	u8 subslice_mask;
+	u8 min_eus_per_subslice;
+	u8 max_eus_per_subslice;
+};
+
+static inline struct intel_sseu
+intel_device_default_sseu(const struct sseu_dev_info *sseu)
+{
+	struct intel_sseu value = {
+		.slice_mask = sseu->slice_mask,
+		.subslice_mask = sseu->subslice_mask[0],
+		.min_eus_per_subslice = sseu->max_eus_per_subslice,
+		.max_eus_per_subslice = sseu->max_eus_per_subslice,
+	};
+
+	return value;
+}
+
+u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
+			 const struct intel_sseu *req_sseu);
+
+#endif /* __INTEL_SSEU_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 4e1b6efc6b22..e1cb22f03e8e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -962,8 +962,7 @@ __sseu_finish(struct drm_i915_private *i915,
 	      unsigned int expected,
 	      struct igt_spinner *spin)
 {
-	unsigned int slices =
-		hweight32(intel_device_default_sseu(i915).slice_mask);
+	unsigned int slices = hweight32(engine->sseu.slice_mask);
 	u32 rpcs = 0;
 	int ret = 0;
 
@@ -1047,8 +1046,8 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	       const char *name,
 	       unsigned int flags)
 {
-	struct intel_sseu default_sseu = intel_device_default_sseu(i915);
 	struct intel_engine_cs *engine = i915->engine[RCS0];
+	struct intel_sseu default_sseu = engine->sseu;
 	struct drm_i915_gem_object *obj;
 	struct i915_gem_context *ctx;
 	struct intel_sseu pg_sseu;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (4 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 06/32] drm/i915: Store the default sseu setup on the engine Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  9:42   ` Tvrtko Ursulin
  2019-04-18 12:04   ` Joonas Lahtinen
  2019-04-17  7:56 ` [PATCH 08/32] drm/i915: Introduce struct intel_wakeref Chris Wilson
                   ` (28 subsequent siblings)
  34 siblings, 2 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Start partitioning off the code that talks to the hardware (GT) from the
uapi layers and move the device facing code under gt/

One casualty is s/intel_ringbuffer.h/intel_engine.h/ with the plan to
subdivide that header and body further (and split out the submission
code from the ringbuffer and logical context handling). This patch aims
to be simple motion so git can fixup inflight patches with little mess.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 | 46 ++++++++++++-------
 drivers/gpu/drm/i915/Makefile.header-test     |  6 +--
 drivers/gpu/drm/i915/gt/Makefile              |  2 +
 drivers/gpu/drm/i915/gt/Makefile.header-test  | 16 +++++++
 .../gpu/drm/i915/{ => gt}/intel_breadcrumbs.c |  0
 drivers/gpu/drm/i915/{ => gt}/intel_context.c |  3 +-
 drivers/gpu/drm/i915/{ => gt}/intel_context.h |  0
 .../drm/i915/{ => gt}/intel_context_types.h   |  0
 .../{intel_ringbuffer.h => gt/intel_engine.h} |  0
 .../gpu/drm/i915/{ => gt}/intel_engine_cs.c   |  8 ++--
 .../drm/i915/{ => gt}/intel_engine_types.h    |  5 +-
 .../drm/i915/{ => gt}/intel_gpu_commands.h    |  0
 .../gpu/drm/i915/{ => gt}/intel_hangcheck.c   |  4 +-
 drivers/gpu/drm/i915/{ => gt}/intel_lrc.c     |  5 +-
 drivers/gpu/drm/i915/{ => gt}/intel_lrc.h     |  4 +-
 drivers/gpu/drm/i915/{ => gt}/intel_lrc_reg.h |  0
 drivers/gpu/drm/i915/{ => gt}/intel_mocs.c    |  4 +-
 drivers/gpu/drm/i915/{ => gt}/intel_mocs.h    |  4 +-
 .../i915/{i915_reset.c => gt/intel_reset.c}   |  2 +-
 .../i915/{i915_reset.h => gt/intel_reset.h}   |  2 +-
 .../gpu/drm/i915/{ => gt}/intel_ringbuffer.c  |  3 +-
 drivers/gpu/drm/i915/{ => gt}/intel_sseu.c    |  0
 drivers/gpu/drm/i915/{ => gt}/intel_sseu.h    |  0
 .../gpu/drm/i915/{ => gt}/intel_workarounds.c |  2 +-
 .../gpu/drm/i915/{ => gt}/intel_workarounds.h |  8 +++-
 .../i915/{ => gt}/intel_workarounds_types.h   |  0
 .../drm/i915/{selftests => gt}/mock_engine.c  | 10 ++--
 .../drm/i915/{selftests => gt}/mock_engine.h  |  2 +-
 .../selftest_engine_cs.c}                     |  0
 .../selftest_hangcheck.c}                     | 16 +++----
 .../intel_lrc.c => gt/selftest_lrc.c}         | 16 +++----
 .../selftest_workarounds.c}                   | 18 ++++----
 drivers/gpu/drm/i915/i915_cmd_parser.c        |  3 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |  3 +-
 drivers/gpu/drm/i915/i915_drv.c               |  5 +-
 drivers/gpu/drm/i915/i915_drv.h               |  7 +--
 drivers/gpu/drm/i915/i915_gem.c               |  7 +--
 drivers/gpu/drm/i915/i915_gem_context.c       |  7 ++-
 drivers/gpu/drm/i915/i915_gem_context.h       |  3 +-
 drivers/gpu/drm/i915/i915_gem_context_types.h |  3 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  1 -
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.h         |  3 +-
 drivers/gpu/drm/i915/i915_perf.c              |  3 +-
 drivers/gpu/drm/i915/i915_pmu.c               |  4 +-
 drivers/gpu/drm/i915/i915_request.c           |  1 -
 drivers/gpu/drm/i915/i915_scheduler_types.h   |  2 +-
 drivers/gpu/drm/i915/i915_trace.h             |  3 +-
 drivers/gpu/drm/i915/i915_vma.c               |  3 +-
 drivers/gpu/drm/i915/intel_device_info.h      |  6 ++-
 drivers/gpu/drm/i915/intel_display.c          |  1 -
 drivers/gpu/drm/i915/intel_guc_submission.c   |  3 +-
 drivers/gpu/drm/i915/intel_guc_submission.h   |  3 +-
 drivers/gpu/drm/i915/intel_uc.c               |  2 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c |  5 +-
 drivers/gpu/drm/i915/selftests/igt_reset.c    |  3 +-
 drivers/gpu/drm/i915/selftests/igt_spinner.h  |  3 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  3 +-
 drivers/gpu/drm/i915/selftests/mock_request.c |  3 +-
 59 files changed, 166 insertions(+), 112 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/Makefile
 create mode 100644 drivers/gpu/drm/i915/gt/Makefile.header-test
 rename drivers/gpu/drm/i915/{ => gt}/intel_breadcrumbs.c (100%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_context.c (99%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_context.h (100%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_context_types.h (100%)
 rename drivers/gpu/drm/i915/{intel_ringbuffer.h => gt/intel_engine.h} (100%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_engine_cs.c (99%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_engine_types.h (99%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_gpu_commands.h (100%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_hangcheck.c (99%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_lrc.c (99%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_lrc.h (98%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_lrc_reg.h (100%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_mocs.c (99%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_mocs.h (97%)
 rename drivers/gpu/drm/i915/{i915_reset.c => gt/intel_reset.c} (99%)
 rename drivers/gpu/drm/i915/{i915_reset.h => gt/intel_reset.h} (98%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_ringbuffer.c (99%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_sseu.c (100%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_sseu.h (100%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds.c (99%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds.h (88%)
 rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds_types.h (100%)
 rename drivers/gpu/drm/i915/{selftests => gt}/mock_engine.c (97%)
 rename drivers/gpu/drm/i915/{selftests => gt}/mock_engine.h (98%)
 rename drivers/gpu/drm/i915/{selftests/intel_engine_cs.c => gt/selftest_engine_cs.c} (100%)
 rename drivers/gpu/drm/i915/{selftests/intel_hangcheck.c => gt/selftest_hangcheck.c} (99%)
 rename drivers/gpu/drm/i915/{selftests/intel_lrc.c => gt/selftest_lrc.c} (99%)
 rename drivers/gpu/drm/i915/{selftests/intel_workarounds.c => gt/selftest_workarounds.c} (98%)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 53ff209b91bb..40130cf5c003 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,32 +35,53 @@ subdir-ccflags-y += \
 # Extra header tests
 include $(src)/Makefile.header-test
 
+subdir-ccflags-y += -I$(src)
+
 # Please keep these build lists sorted!
 
 # core driver code
 i915-y += i915_drv.o \
 	  i915_irq.o \
-	  i915_memcpy.o \
-	  i915_mm.o \
 	  i915_params.o \
 	  i915_pci.o \
-	  i915_reset.o \
 	  i915_suspend.o \
-	  i915_sw_fence.o \
-	  i915_syncmap.o \
 	  i915_sysfs.o \
-	  i915_user_extensions.o \
 	  intel_csr.o \
 	  intel_device_info.o \
 	  intel_pm.o \
 	  intel_runtime_pm.o \
-	  intel_workarounds.o
+	  intel_uncore.o
+
+# core library code
+i915-y += \
+	i915_memcpy.o \
+	i915_mm.o \
+	i915_sw_fence.o \
+	i915_syncmap.o \
+	i915_user_extensions.o
 
 i915-$(CONFIG_COMPAT)   += i915_ioc32.o
 i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
 i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
-# GEM code
+# "Graphics Technology" (aka we talk to the gpu)
+obj-y += gt/
+gt-y += \
+	gt/intel_breadcrumbs.o \
+	gt/intel_context.o \
+	gt/intel_engine_cs.o \
+	gt/intel_hangcheck.o \
+	gt/intel_lrc.o \
+	gt/intel_reset.o \
+	gt/intel_ringbuffer.o \
+	gt/intel_mocs.o \
+	gt/intel_sseu.o \
+	gt/intel_workarounds.o
+gt-$(CONFIG_DRM_I915_SELFTEST) += \
+	gt/mock_engine.o
+i915-y += $(gt-y)
+
+# GEM (Graphics Execution Management) code
 i915-y += \
 	  i915_active.o \
 	  i915_cmd_parser.o \
@@ -88,15 +109,6 @@ i915-y += \
 	  i915_timeline.o \
 	  i915_trace_points.o \
 	  i915_vma.o \
-	  intel_breadcrumbs.o \
-	  intel_context.o \
-	  intel_engine_cs.o \
-	  intel_hangcheck.o \
-	  intel_lrc.o \
-	  intel_mocs.o \
-	  intel_ringbuffer.o \
-	  intel_sseu.o \
-	  intel_uncore.o \
 	  intel_wopcm.o
 
 # general-purpose microcontroller (GuC) support
diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
index 5bcc78d7ac96..96a5d90629ec 100644
--- a/drivers/gpu/drm/i915/Makefile.header-test
+++ b/drivers/gpu/drm/i915/Makefile.header-test
@@ -13,13 +13,11 @@ header_test := \
 	intel_cdclk.h \
 	intel_color.h \
 	intel_connector.h \
-	intel_context_types.h \
 	intel_crt.h \
 	intel_csr.h \
 	intel_ddi.h \
 	intel_dp.h \
 	intel_dvo.h \
-	intel_engine_types.h \
 	intel_fbc.h \
 	intel_fbdev.h \
 	intel_frontbuffer.h \
@@ -33,9 +31,7 @@ header_test := \
 	intel_psr.h \
 	intel_sdvo.h \
 	intel_sprite.h \
-	intel_sseu.h \
-	intel_tv.h \
-	intel_workarounds_types.h
+	intel_tv.h
 
 quiet_cmd_header_test = HDRTEST $@
       cmd_header_test = echo "\#include \"$(<F)\"" > $@
diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile
new file mode 100644
index 000000000000..1c75b5c9790c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/Makefile
@@ -0,0 +1,2 @@
+# Extra header tests
+include $(src)/Makefile.header-test
diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test
new file mode 100644
index 000000000000..61e06cbb4b32
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/Makefile.header-test
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: MIT
+# Copyright © 2019 Intel Corporation
+
+# Test the headers are compilable as standalone units
+header_test := $(notdir $(wildcard $(src)/*.h))
+
+quiet_cmd_header_test = HDRTEST $@
+      cmd_header_test = echo "\#include \"$(<F)\"" > $@
+
+header_test_%.c: %.h
+	$(call cmd,header_test)
+
+extra-$(CONFIG_DRM_I915_WERROR) += \
+	$(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
+
+clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
similarity index 100%
rename from drivers/gpu/drm/i915/intel_breadcrumbs.c
rename to drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
similarity index 99%
rename from drivers/gpu/drm/i915/intel_context.c
rename to drivers/gpu/drm/i915/gt/intel_context.c
index 961d1445833d..ebd1e5919a4a 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -7,8 +7,9 @@
 #include "i915_drv.h"
 #include "i915_gem_context.h"
 #include "i915_globals.h"
+
 #include "intel_context.h"
-#include "intel_ringbuffer.h"
+#include "intel_engine.h"
 
 static struct i915_global_context {
 	struct i915_global base;
diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
similarity index 100%
rename from drivers/gpu/drm/i915/intel_context.h
rename to drivers/gpu/drm/i915/gt/intel_context.h
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
similarity index 100%
rename from drivers/gpu/drm/i915/intel_context_types.h
rename to drivers/gpu/drm/i915/gt/intel_context_types.h
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/gt/intel_engine.h
similarity index 100%
rename from drivers/gpu/drm/i915/intel_ringbuffer.h
rename to drivers/gpu/drm/i915/gt/intel_engine.h
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
similarity index 99%
rename from drivers/gpu/drm/i915/intel_engine_cs.c
rename to drivers/gpu/drm/i915/gt/intel_engine_cs.c
index ad2a683d97f7..21dd3f25e641 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -25,9 +25,10 @@
 #include <drm/drm_print.h>
 
 #include "i915_drv.h"
-#include "i915_reset.h"
-#include "intel_ringbuffer.h"
+
+#include "intel_engine.h"
 #include "intel_lrc.h"
+#include "intel_reset.h"
 
 /* Haswell does have the CXT_SIZE register however it does not appear to be
  * valid. Now, docs explain in dwords what is in the context object. The full
@@ -1756,6 +1757,5 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/mock_engine.c"
-#include "selftests/intel_engine_cs.c"
+#include "selftest_engine_cs.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
similarity index 99%
rename from drivers/gpu/drm/i915/intel_engine_types.h
rename to drivers/gpu/drm/i915/gt/intel_engine_types.h
index d07a01b3ed0b..3adf58da6d2c 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -14,15 +14,14 @@
 #include <linux/types.h>
 
 #include "i915_gem.h"
+#include "i915_gem_batch_pool.h"
+#include "i915_pmu.h"
 #include "i915_priolist_types.h"
 #include "i915_selftest.h"
 #include "i915_timeline_types.h"
 #include "intel_sseu.h"
 #include "intel_workarounds_types.h"
 
-#include "i915_gem_batch_pool.h"
-#include "i915_pmu.h"
-
 #define I915_MAX_SLICES	3
 #define I915_MAX_SUBSLICES 8
 
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
similarity index 100%
rename from drivers/gpu/drm/i915/intel_gpu_commands.h
rename to drivers/gpu/drm/i915/gt/intel_gpu_commands.h
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
similarity index 99%
rename from drivers/gpu/drm/i915/intel_hangcheck.c
rename to drivers/gpu/drm/i915/gt/intel_hangcheck.c
index 3d51ed1428d4..3053a706a561 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
@@ -22,8 +22,8 @@
  *
  */
 
+#include "intel_reset.h"
 #include "i915_drv.h"
-#include "i915_reset.h"
 
 struct hangcheck {
 	u64 acthd;
@@ -330,5 +330,5 @@ void intel_hangcheck_init(struct drm_i915_private *i915)
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_hangcheck.c"
+#include "selftest_hangcheck.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
similarity index 99%
rename from drivers/gpu/drm/i915/intel_lrc.c
rename to drivers/gpu/drm/i915/gt/intel_lrc.c
index 18a9dc6ca877..5cadf8f6a23d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -133,13 +133,12 @@
  */
 #include <linux/interrupt.h>
 
-#include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
-#include "i915_reset.h"
 #include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
+#include "intel_reset.h"
 #include "intel_workarounds.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
@@ -2905,5 +2904,5 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_lrc.c"
+#include "selftest_lrc.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
similarity index 98%
rename from drivers/gpu/drm/i915/intel_lrc.h
rename to drivers/gpu/drm/i915/gt/intel_lrc.h
index 99f75ee9d087..1a33ec74af8c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -24,8 +24,7 @@
 #ifndef _INTEL_LRC_H_
 #define _INTEL_LRC_H_
 
-#include "intel_ringbuffer.h"
-#include "i915_gem_context.h"
+#include "intel_engine.h"
 
 /* Execlists regs */
 #define RING_ELSP(base)				_MMIO((base) + 0x230)
@@ -99,7 +98,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
 struct drm_printer;
 
 struct drm_i915_private;
-struct i915_gem_context;
 
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
similarity index 100%
rename from drivers/gpu/drm/i915/intel_lrc_reg.h
rename to drivers/gpu/drm/i915/gt/intel_lrc_reg.h
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
similarity index 99%
rename from drivers/gpu/drm/i915/intel_mocs.c
rename to drivers/gpu/drm/i915/gt/intel_mocs.c
index 274ba78500c0..79df66022d3a 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -20,9 +20,11 @@
  * SOFTWARE.
  */
 
+#include "i915_drv.h"
+
+#include "intel_engine.h"
 #include "intel_mocs.h"
 #include "intel_lrc.h"
-#include "intel_ringbuffer.h"
 
 /* structures required */
 struct drm_i915_mocs_entry {
diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
similarity index 97%
rename from drivers/gpu/drm/i915/intel_mocs.h
rename to drivers/gpu/drm/i915/gt/intel_mocs.h
index 3d99d1271b2b..0913704a1af2 100644
--- a/drivers/gpu/drm/i915/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -49,7 +49,9 @@
  * context handling keep the MOCS in step.
  */
 
-#include "i915_drv.h"
+struct drm_i915_private;
+struct i915_request;
+struct intel_engine_cs;
 
 int intel_rcs_context_init_mocs(struct i915_request *rq);
 void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
similarity index 99%
rename from drivers/gpu/drm/i915/i915_reset.c
rename to drivers/gpu/drm/i915/gt/intel_reset.c
index 677d59304e78..9731a2295639 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -9,7 +9,7 @@
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
-#include "i915_reset.h"
+#include "intel_reset.h"
 
 #include "intel_guc.h"
 
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
similarity index 98%
rename from drivers/gpu/drm/i915/i915_reset.h
rename to drivers/gpu/drm/i915/gt/intel_reset.h
index 3c0450289b8f..8e662bb43a9b 100644
--- a/drivers/gpu/drm/i915/i915_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -11,7 +11,7 @@
 #include <linux/types.h>
 #include <linux/srcu.h>
 
-#include "intel_engine_types.h"
+#include "gt/intel_engine_types.h"
 
 struct drm_i915_private;
 struct i915_request;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
similarity index 99%
rename from drivers/gpu/drm/i915/intel_ringbuffer.c
rename to drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 029fd8ec1857..c1214fd25702 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -33,9 +33,8 @@
 
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
-#include "i915_reset.h"
 #include "i915_trace.h"
-#include "intel_drv.h"
+#include "intel_reset.h"
 #include "intel_workarounds.h"
 
 /* Rough estimate of the typical request size, performing a flush,
diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
similarity index 100%
rename from drivers/gpu/drm/i915/intel_sseu.c
rename to drivers/gpu/drm/i915/gt/intel_sseu.c
diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
similarity index 100%
rename from drivers/gpu/drm/i915/intel_sseu.h
rename to drivers/gpu/drm/i915/gt/intel_sseu.h
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
similarity index 99%
rename from drivers/gpu/drm/i915/intel_workarounds.c
rename to drivers/gpu/drm/i915/gt/intel_workarounds.c
index b3cbed1ee1c9..f46ed0e2f07c 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1398,5 +1398,5 @@ int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_workarounds.c"
+#include "selftest_workarounds.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h
similarity index 88%
rename from drivers/gpu/drm/i915/intel_workarounds.h
rename to drivers/gpu/drm/i915/gt/intel_workarounds.h
index fdf7ebb90f28..3761a6ee58bb 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h
@@ -4,13 +4,17 @@
  * Copyright © 2014-2018 Intel Corporation
  */
 
-#ifndef _I915_WORKAROUNDS_H_
-#define _I915_WORKAROUNDS_H_
+#ifndef _INTEL_WORKAROUNDS_H_
+#define _INTEL_WORKAROUNDS_H_
 
 #include <linux/slab.h>
 
 #include "intel_workarounds_types.h"
 
+struct drm_i915_private;
+struct i915_request;
+struct intel_engine_cs;
+
 static inline void intel_wa_list_free(struct i915_wa_list *wal)
 {
 	kfree(wal->list);
diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
similarity index 100%
rename from drivers/gpu/drm/i915/intel_workarounds_types.h
rename to drivers/gpu/drm/i915/gt/intel_workarounds_types.h
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
similarity index 97%
rename from drivers/gpu/drm/i915/selftests/mock_engine.c
rename to drivers/gpu/drm/i915/gt/mock_engine.c
index 61a8206ed677..414afd2f27fe 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -22,8 +22,11 @@
  *
  */
 
+#include "i915_drv.h"
+#include "intel_context.h"
+
 #include "mock_engine.h"
-#include "mock_request.h"
+#include "selftests/mock_request.h"
 
 struct mock_ring {
 	struct intel_ring base;
@@ -268,8 +271,9 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	timer_setup(&engine->hw_delay, hw_delay_complete, 0);
 	INIT_LIST_HEAD(&engine->hw_queue);
 
-	if (pin_context(i915->kernel_context, &engine->base,
-			&engine->base.kernel_context))
+	engine->base.kernel_context =
+		intel_context_pin(i915->kernel_context, &engine->base);
+	if (IS_ERR(engine->base.kernel_context))
 		goto err_breadcrumbs;
 
 	return &engine->base;
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h
similarity index 98%
rename from drivers/gpu/drm/i915/selftests/mock_engine.h
rename to drivers/gpu/drm/i915/gt/mock_engine.h
index b9cc3a245f16..44b35a85e9d1 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.h
+++ b/drivers/gpu/drm/i915/gt/mock_engine.h
@@ -29,7 +29,7 @@
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 
-#include "../intel_ringbuffer.h"
+#include "gt/intel_engine.h"
 
 struct mock_engine {
 	struct intel_engine_cs base;
diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
similarity index 100%
rename from drivers/gpu/drm/i915/selftests/intel_engine_cs.c
rename to drivers/gpu/drm/i915/gt/selftest_engine_cs.c
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
similarity index 99%
rename from drivers/gpu/drm/i915/selftests/intel_hangcheck.c
rename to drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 050bd1e19e02..87c26920212f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -24,14 +24,14 @@
 
 #include <linux/kthread.h>
 
-#include "../i915_selftest.h"
-#include "i915_random.h"
-#include "igt_flush_test.h"
-#include "igt_reset.h"
-#include "igt_wedge_me.h"
-
-#include "mock_context.h"
-#include "mock_drm.h"
+#include "i915_selftest.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_wedge_me.h"
+
+#include "selftests/mock_context.h"
+#include "selftests/mock_drm.h"
 
 #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
similarity index 99%
rename from drivers/gpu/drm/i915/selftests/intel_lrc.c
rename to drivers/gpu/drm/i915/gt/selftest_lrc.c
index fbee030db940..cd0551f97c2f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -6,15 +6,13 @@
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_reset.h"
-
-#include "../i915_selftest.h"
-#include "igt_flush_test.h"
-#include "igt_live_test.h"
-#include "igt_spinner.h"
-#include "i915_random.h"
-
-#include "mock_context.h"
+#include "gt/intel_reset.h"
+#include "i915_selftest.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_live_test.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/mock_context.h"
 
 static int live_sanitycheck(void *arg)
 {
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
similarity index 98%
rename from drivers/gpu/drm/i915/selftests/intel_workarounds.c
rename to drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 6f941c31dcab..96c6282f3a10 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -4,15 +4,15 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "../i915_selftest.h"
-#include "../i915_reset.h"
-
-#include "igt_flush_test.h"
-#include "igt_reset.h"
-#include "igt_spinner.h"
-#include "igt_wedge_me.h"
-#include "mock_context.h"
-#include "mock_drm.h"
+#include "i915_selftest.h"
+#include "intel_reset.h"
+
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/igt_wedge_me.h"
+#include "selftests/mock_context.h"
+#include "selftests/mock_drm.h"
 
 static const struct wo_register {
 	enum intel_platform platform;
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 503d548a55f7..e9fadcb4d592 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -25,8 +25,9 @@
  *
  */
 
+#include "gt/intel_engine.h"
+
 #include "i915_drv.h"
-#include "intel_ringbuffer.h"
 
 /**
  * DOC: batch buffer command parser
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5823ffb17821..3f039758b152 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -32,7 +32,8 @@
 #include <drm/drm_debugfs.h>
 #include <drm/drm_fourcc.h>
 
-#include "i915_reset.h"
+#include "gt/intel_reset.h"
+
 #include "intel_dp.h"
 #include "intel_drv.h"
 #include "intel_fbc.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1ad88e6d7c04..98b997526daa 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -47,10 +47,12 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
+#include "gt/intel_workarounds.h"
+#include "gt/intel_reset.h"
+
 #include "i915_drv.h"
 #include "i915_pmu.h"
 #include "i915_query.h"
-#include "i915_reset.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 #include "intel_audio.h"
@@ -62,7 +64,6 @@
 #include "intel_pm.h"
 #include "intel_sprite.h"
 #include "intel_uc.h"
-#include "intel_workarounds.h"
 
 static struct drm_driver driver;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7b5da9eddc1c..fad5306f07da 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -62,18 +62,19 @@
 #include "i915_reg.h"
 #include "i915_utils.h"
 
+#include "gt/intel_lrc.h"
+#include "gt/intel_engine.h"
+#include "gt/intel_workarounds.h"
+
 #include "intel_bios.h"
 #include "intel_device_info.h"
 #include "intel_display.h"
 #include "intel_dpll_mgr.h"
 #include "intel_frontbuffer.h"
-#include "intel_lrc.h"
 #include "intel_opregion.h"
-#include "intel_ringbuffer.h"
 #include "intel_uc.h"
 #include "intel_uncore.h"
 #include "intel_wopcm.h"
-#include "intel_workarounds.h"
 
 #include "i915_gem.h"
 #include "i915_gem_context.h"
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a5412323fee1..9554960977a3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -39,19 +39,20 @@
 #include <linux/dma-buf.h>
 #include <linux/mman.h>
 
+#include "gt/intel_mocs.h"
+#include "gt/intel_reset.h"
+#include "gt/intel_workarounds.h"
+
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_gemfs.h"
 #include "i915_globals.h"
-#include "i915_reset.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
-#include "intel_mocs.h"
 #include "intel_pm.h"
-#include "intel_workarounds.h"
 
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index c02a30612df9..37dff694456c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,13 +86,16 @@
  */
 
 #include <linux/log2.h>
+
 #include <drm/i915_drm.h>
+
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_workarounds.h"
+
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "intel_lrc_reg.h"
-#include "intel_workarounds.h"
 
 #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
 #define I915_CONTEXT_PARAM_VM 0x9
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 23dcb01bfd82..cec278ab04e2 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -27,9 +27,10 @@
 
 #include "i915_gem_context_types.h"
 
+#include "gt/intel_context.h"
+
 #include "i915_gem.h"
 #include "i915_scheduler.h"
-#include "intel_context.h"
 #include "intel_device_info.h"
 
 struct drm_device;
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index e2ec58b10fb2..d282a6ab3b9f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -17,8 +17,9 @@
 #include <linux/rcupdate.h>
 #include <linux/types.h>
 
+#include "gt/intel_context_types.h"
+
 #include "i915_scheduler.h"
-#include "intel_context_types.h"
 
 struct pid;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8f460cc4cc1f..aab778728ea2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -37,7 +37,6 @@
 
 #include "i915_drv.h"
 #include "i915_vgpu.h"
-#include "i915_reset.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index f597f35b109b..c8d96e91f3dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -38,8 +38,8 @@
 #include <linux/mm.h>
 #include <linux/pagevec.h>
 
+#include "gt/intel_reset.h"
 #include "i915_request.h"
-#include "i915_reset.h"
 #include "i915_selftest.h"
 #include "i915_timeline.h"
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 5dc761e85d9d..b419d0f59275 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -13,8 +13,9 @@
 
 #include <drm/drm_mm.h>
 
+#include "gt/intel_engine.h"
+
 #include "intel_device_info.h"
-#include "intel_ringbuffer.h"
 #include "intel_uc_fw.h"
 
 #include "i915_gem.h"
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 56da457bed21..a87f790335c1 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -195,6 +195,8 @@
 #include <linux/sizes.h>
 #include <linux/uuid.h>
 
+#include "gt/intel_lrc_reg.h"
+
 #include "i915_drv.h"
 #include "i915_oa_hsw.h"
 #include "i915_oa_bdw.h"
@@ -210,7 +212,6 @@
 #include "i915_oa_cflgt3.h"
 #include "i915_oa_cnl.h"
 #include "i915_oa_icl.h"
-#include "intel_lrc_reg.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
  * is currently generally designed assuming the largest 16M size is used such
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 46a52da3db29..35e502481f29 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -6,8 +6,10 @@
 
 #include <linux/irq.h>
 #include <linux/pm_runtime.h>
+
+#include "gt/intel_engine.h"
+
 #include "i915_pmu.h"
-#include "intel_ringbuffer.h"
 #include "i915_drv.h"
 
 /* Frequency for the sampling timer for events which need it. */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e0efc334463b..74ae698c1f95 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -32,7 +32,6 @@
 #include "i915_active.h"
 #include "i915_drv.h"
 #include "i915_globals.h"
-#include "i915_reset.h"
 #include "intel_pm.h"
 
 struct execute_cb {
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index f1af3916a808..166a457884b2 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -9,8 +9,8 @@
 
 #include <linux/list.h>
 
+#include "gt/intel_engine_types.h"
 #include "i915_priolist_types.h"
-#include "intel_engine_types.h"
 
 struct drm_i915_private;
 struct i915_request;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 12893304c8f8..b5286f3d8146 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -8,9 +8,10 @@
 
 #include <drm/drm_drv.h>
 
+#include "gt/intel_engine.h"
+
 #include "i915_drv.h"
 #include "intel_drv.h"
-#include "intel_ringbuffer.h"
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM i915
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 36726392e737..d4d308b6d1d8 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -22,11 +22,12 @@
  *
  */
 
+#include "gt/intel_engine.h"
+
 #include "i915_vma.h"
 
 #include "i915_drv.h"
 #include "i915_globals.h"
-#include "intel_ringbuffer.h"
 #include "intel_frontbuffer.h"
 
 #include <drm/drm_gem.h>
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 3045e0dee2a1..aa89a9adeffb 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -27,9 +27,11 @@
 
 #include <uapi/drm/i915_drm.h>
 
-#include "intel_engine_types.h"
+#include "gt/intel_engine_types.h"
+#include "gt/intel_context_types.h"
+#include "gt/intel_sseu.h"
+
 #include "intel_display.h"
-#include "intel_sseu.h"
 
 struct drm_printer;
 struct drm_i915_private;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3bd40a4a6739..24e70d46b872 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -46,7 +46,6 @@
 
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
-#include "i915_reset.h"
 #include "i915_trace.h"
 #include "intel_atomic_plane.h"
 #include "intel_color.h"
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 37f60cb8e9e1..1b6d6403ee92 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -25,8 +25,9 @@
 #include <linux/circ_buf.h>
 #include <trace/events/dma_fence.h>
 
+#include "gt/intel_lrc_reg.h"
+
 #include "intel_guc_submission.h"
-#include "intel_lrc_reg.h"
 #include "i915_drv.h"
 
 #define GUC_PREEMPT_FINISHED		0x1
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h
index aa5e6749c925..7d823a513b9c 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/intel_guc_submission.h
@@ -27,9 +27,10 @@
 
 #include <linux/spinlock.h>
 
+#include "gt/intel_engine_types.h"
+
 #include "i915_gem.h"
 #include "i915_selftest.h"
-#include "intel_engine_types.h"
 
 struct drm_i915_private;
 
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 25b80ffe71ad..13f823ff8083 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -22,11 +22,11 @@
  *
  */
 
+#include "gt/intel_reset.h"
 #include "intel_uc.h"
 #include "intel_guc_submission.h"
 #include "intel_guc.h"
 #include "i915_drv.h"
-#include "i915_reset.h"
 
 static void guc_free_load_err_log(struct intel_guc *guc);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index e1cb22f03e8e..6f52ca881173 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -24,8 +24,9 @@
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_reset.h"
-#include "../i915_selftest.h"
+#include "gt/intel_reset.h"
+#include "i915_selftest.h"
+
 #include "i915_random.h"
 #include "igt_flush_test.h"
 #include "igt_live_test.h"
diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c
index 208a966da8ca..4f31b137c428 100644
--- a/drivers/gpu/drm/i915/selftests/igt_reset.c
+++ b/drivers/gpu/drm/i915/selftests/igt_reset.c
@@ -6,8 +6,9 @@
 
 #include "igt_reset.h"
 
+#include "gt/intel_engine.h"
+
 #include "../i915_drv.h"
-#include "../intel_ringbuffer.h"
 
 void igt_global_reset_lock(struct drm_i915_private *i915)
 {
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h
index 391777c76dc7..d312e7cdab68 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.h
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h
@@ -9,9 +9,10 @@
 
 #include "../i915_selftest.h"
 
+#include "gt/intel_engine.h"
+
 #include "../i915_drv.h"
 #include "../i915_request.h"
-#include "../intel_ringbuffer.h"
 #include "../i915_gem_context.h"
 
 struct igt_spinner {
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 60bbf8b4df40..f444ee5add27 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -25,7 +25,8 @@
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
 
-#include "mock_engine.h"
+#include "gt/mock_engine.h"
+
 #include "mock_context.h"
 #include "mock_request.h"
 #include "mock_gem_device.h"
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index d1a7c9608712..f739ba63057f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -22,7 +22,8 @@
  *
  */
 
-#include "mock_engine.h"
+#include "gt/mock_engine.h"
+
 #include "mock_request.h"
 
 struct i915_request *
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 08/32] drm/i915: Introduce struct intel_wakeref
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (5 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/ Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  9:45   ` Tvrtko Ursulin
  2019-04-17  7:56 ` [PATCH 09/32] drm/i915: Pull the GEM powermangement coupling into its own file Chris Wilson
                   ` (27 subsequent siblings)
  34 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

For controlling runtime pm of the GT and engines, we would like to have
a callback to do extra work the first time we wake up and the last time
we drop the wakeref. This first/last access needs serialisation and so
we encompass a mutex with the regular intel_wakeref_t tracker.

v2: Drop the _once naming and report the errors.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc; Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile             |   1 +
 drivers/gpu/drm/i915/Makefile.header-test |   3 +-
 drivers/gpu/drm/i915/i915_drv.h           |   3 +-
 drivers/gpu/drm/i915/intel_wakeref.c      |  61 ++++++++++
 drivers/gpu/drm/i915/intel_wakeref.h      | 133 ++++++++++++++++++++++
 5 files changed, 198 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_wakeref.c
 create mode 100644 drivers/gpu/drm/i915/intel_wakeref.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 40130cf5c003..233bad5e361f 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -50,6 +50,7 @@ i915-y += i915_drv.o \
 	  intel_device_info.o \
 	  intel_pm.o \
 	  intel_runtime_pm.o \
+	  intel_wakeref.o \
 	  intel_uncore.o
 
 # core library code
diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
index 96a5d90629ec..e6b3e7588860 100644
--- a/drivers/gpu/drm/i915/Makefile.header-test
+++ b/drivers/gpu/drm/i915/Makefile.header-test
@@ -31,7 +31,8 @@ header_test := \
 	intel_psr.h \
 	intel_sdvo.h \
 	intel_sprite.h \
-	intel_tv.h
+	intel_tv.h \
+	intel_wakeref.h
 
 quiet_cmd_header_test = HDRTEST $@
       cmd_header_test = echo "\#include \"$(<F)\"" > $@
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fad5306f07da..62a7e91acd7f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -74,6 +74,7 @@
 #include "intel_opregion.h"
 #include "intel_uc.h"
 #include "intel_uncore.h"
+#include "intel_wakeref.h"
 #include "intel_wopcm.h"
 
 #include "i915_gem.h"
@@ -134,8 +135,6 @@ bool i915_error_injected(void);
 	__i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
 		      fmt, ##__VA_ARGS__)
 
-typedef depot_stack_handle_t intel_wakeref_t;
-
 enum hpd_pin {
 	HPD_NONE = 0,
 	HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
new file mode 100644
index 000000000000..1f94bc4ff9e4
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -0,0 +1,61 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_drv.h"
+#include "intel_wakeref.h"
+
+int __intel_wakeref_get_first(struct drm_i915_private *i915,
+			      struct intel_wakeref *wf,
+			      int (*fn)(struct intel_wakeref *wf))
+{
+	/*
+	 * Treat get/put as different subclasses, as we may need to run
+	 * the put callback from under the shrinker and do not want to
+	 * cross-contanimate that callback with any extra work performed
+	 * upon acquiring the wakeref.
+	 */
+	mutex_lock_nested(&wf->mutex, SINGLE_DEPTH_NESTING);
+	if (!atomic_read(&wf->count)) {
+		int err;
+
+		wf->wakeref = intel_runtime_pm_get(i915);
+
+		err = fn(wf);
+		if (unlikely(err)) {
+			intel_runtime_pm_put(i915, wf->wakeref);
+			mutex_unlock(&wf->mutex);
+			return err;
+		}
+
+		smp_mb__before_atomic(); /* release wf->count */
+	}
+	atomic_inc(&wf->count);
+	mutex_unlock(&wf->mutex);
+
+	return 0;
+}
+
+int __intel_wakeref_put_last(struct drm_i915_private *i915,
+			     struct intel_wakeref *wf,
+			     int (*fn)(struct intel_wakeref *wf))
+{
+	int err;
+
+	err = fn(wf);
+	if (likely(!err))
+		intel_runtime_pm_put(i915, wf->wakeref);
+	else
+		atomic_inc(&wf->count);
+	mutex_unlock(&wf->mutex);
+
+	return err;
+}
+
+void __intel_wakeref_init(struct intel_wakeref *wf, struct lock_class_key *key)
+{
+	__mutex_init(&wf->mutex, "wakeref", key);
+	atomic_set(&wf->count, 0);
+}
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
new file mode 100644
index 000000000000..a979d638344b
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -0,0 +1,133 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_WAKEREF_H
+#define INTEL_WAKEREF_H
+
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+#include <linux/stackdepot.h>
+
+struct drm_i915_private;
+
+typedef depot_stack_handle_t intel_wakeref_t;
+
+struct intel_wakeref {
+	atomic_t count;
+	struct mutex mutex;
+	intel_wakeref_t wakeref;
+};
+
+void __intel_wakeref_init(struct intel_wakeref *wf,
+			  struct lock_class_key *key);
+#define intel_wakeref_init(wf) do {					\
+	static struct lock_class_key __key;				\
+									\
+	__intel_wakeref_init((wf), &__key);				\
+} while (0)
+
+int __intel_wakeref_get_first(struct drm_i915_private *i915,
+			      struct intel_wakeref *wf,
+			      int (*fn)(struct intel_wakeref *wf));
+int __intel_wakeref_put_last(struct drm_i915_private *i915,
+			     struct intel_wakeref *wf,
+			     int (*fn)(struct intel_wakeref *wf));
+
+/**
+ * intel_wakeref_get: Acquire the wakeref
+ * @i915: the drm_i915_private device
+ * @wf: the wakeref
+ * @fn: callback for acquired the wakeref, called only on first acquire.
+ *
+ * Acquire a hold on the wakeref. The first user to do so, will acquire
+ * the runtime pm wakeref and then call the @fn underneath the wakeref
+ * mutex.
+ *
+ * Note that @fn is allowed to fail, in which case the runtime-pm wakeref
+ * will be released and the acquisition unwound, and an error reported.
+ *
+ * Returns: 0 if the wakeref was acquired successfully, or a negative error
+ * code otherwise.
+ */
+static inline int
+intel_wakeref_get(struct drm_i915_private *i915,
+		  struct intel_wakeref *wf,
+		  int (*fn)(struct intel_wakeref *wf))
+{
+	if (unlikely(!atomic_inc_not_zero(&wf->count)))
+		return __intel_wakeref_get_first(i915, wf, fn);
+
+	return 0;
+}
+
+/**
+ * intel_wakeref_put: Release the wakeref
+ * @i915: the drm_i915_private device
+ * @wf: the wakeref
+ * @fn: callback for releasing the wakeref, called only on final release.
+ *
+ * Release our hold on the wakeref. When there are no more users,
+ * the runtime pm wakeref will be released after the @fn callback is called
+ * underneath the wakeref mutex.
+ *
+ * Note that @fn is allowed to fail, in which case the runtime-pm wakeref
+ * is retained and an error reported.
+ *
+ * Returns: 0 if the wakeref was released successfully, or a negative error
+ * code otherwise.
+ */
+static inline int
+intel_wakeref_put(struct drm_i915_private *i915,
+		  struct intel_wakeref *wf,
+		  int (*fn)(struct intel_wakeref *wf))
+{
+	if (atomic_dec_and_mutex_lock(&wf->count, &wf->mutex))
+		return __intel_wakeref_put_last(i915, wf, fn);
+
+	return 0;
+}
+
+/**
+ * intel_wakeref_lock: Lock the wakeref (mutex)
+ * @wf: the wakeref
+ *
+ * Locks the wakeref to prevent it being acquired or released. New users
+ * can still adjust the counter, but the wakeref itself (and callback)
+ * cannot be acquired or released.
+ */
+static inline void
+intel_wakeref_lock(struct intel_wakeref *wf)
+	__acquires(wf->mutex)
+{
+	mutex_lock(&wf->mutex);
+}
+
+/**
+ * intel_wakeref_unlock: Unlock the wakeref
+ * @wf: the wakeref
+ *
+ * Releases a previously acquired intel_wakeref_lock().
+ */
+static inline void
+intel_wakeref_unlock(struct intel_wakeref *wf)
+	__releases(wf->mutex)
+{
+	mutex_unlock(&wf->mutex);
+}
+
+/**
+ * intel_wakeref_active: Query whether the wakeref is currently held
+ * @wf: the wakeref
+ *
+ * Returns: true if the wakeref is currently held.
+ */
+static inline bool
+intel_wakeref_active(struct intel_wakeref *wf)
+{
+	return atomic_read(&wf->count);
+}
+
+#endif /* INTEL_WAKEREF_H */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 09/32] drm/i915: Pull the GEM powermangement coupling into its own file
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (6 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 08/32] drm/i915: Introduce struct intel_wakeref Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 10/32] drm/i915: Introduce context->enter() and context->exit() Chris Wilson
                   ` (26 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Split out the powermanagement portion (GT wakeref, suspend/resume) of
GEM from i915_gem.c into its own file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/Makefile.header-test     |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c           |   4 +-
 drivers/gpu/drm/i915/i915_drv.h               |  12 +-
 drivers/gpu/drm/i915/i915_gem.c               | 363 +----------------
 drivers/gpu/drm/i915/i915_gem_pm.c            | 365 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_pm.h            |  28 ++
 .../gpu/drm/i915/selftests/i915_gem_context.c |   2 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |   8 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  10 +-
 10 files changed, 418 insertions(+), 376 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_pm.c
 create mode 100644 drivers/gpu/drm/i915/i915_gem_pm.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 233bad5e361f..858642c7bc40 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -97,6 +97,7 @@ i915-y += \
 	  i915_gem_internal.o \
 	  i915_gem.o \
 	  i915_gem_object.o \
+	  i915_gem_pm.o \
 	  i915_gem_render_state.o \
 	  i915_gem_shrinker.o \
 	  i915_gem_stolen.o \
diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
index e6b3e7588860..702e3a7ade4c 100644
--- a/drivers/gpu/drm/i915/Makefile.header-test
+++ b/drivers/gpu/drm/i915/Makefile.header-test
@@ -5,6 +5,7 @@
 header_test := \
 	i915_active_types.h \
 	i915_gem_context_types.h \
+	i915_gem_pm.h \
 	i915_priolist_types.h \
 	i915_scheduler_types.h \
 	i915_timeline_types.h \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3f039758b152..8dcba78fb43b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3941,8 +3941,8 @@ i915_drop_caches_set(void *data, u64 val)
 	if (val & DROP_IDLE) {
 		do {
 			if (READ_ONCE(i915->gt.active_requests))
-				flush_delayed_work(&i915->gt.retire_work);
-			drain_delayed_work(&i915->gt.idle_work);
+				flush_delayed_work(&i915->gem.retire_work);
+			drain_delayed_work(&i915->gem.idle_work);
 		} while (READ_ONCE(i915->gt.awake));
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 62a7e91acd7f..cbae9be052e0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2020,6 +2020,12 @@ struct drm_i915_private {
 		 */
 		intel_wakeref_t awake;
 
+		ktime_t last_init_time;
+
+		struct i915_vma *scratch;
+	} gt;
+
+	struct {
 		/**
 		 * We leave the user IRQ off as much as possible,
 		 * but this means that requests will finish and never
@@ -2037,11 +2043,7 @@ struct drm_i915_private {
 		 * off the idle_work.
 		 */
 		struct delayed_work idle_work;
-
-		ktime_t last_init_time;
-
-		struct i915_vma *scratch;
-	} gt;
+	} gem;
 
 	/* For i945gm vblank irq vs. C3 workaround */
 	struct {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9554960977a3..74b99126830b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,7 +46,7 @@
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_gemfs.h"
-#include "i915_globals.h"
+#include "i915_gem_pm.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
@@ -103,105 +103,6 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
-static void __i915_gem_park(struct drm_i915_private *i915)
-{
-	intel_wakeref_t wakeref;
-
-	GEM_TRACE("\n");
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(i915->gt.active_requests);
-	GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
-
-	if (!i915->gt.awake)
-		return;
-
-	/*
-	 * Be paranoid and flush a concurrent interrupt to make sure
-	 * we don't reactivate any irq tasklets after parking.
-	 *
-	 * FIXME: Note that even though we have waited for execlists to be idle,
-	 * there may still be an in-flight interrupt even though the CSB
-	 * is now empty. synchronize_irq() makes sure that a residual interrupt
-	 * is completed before we continue, but it doesn't prevent the HW from
-	 * raising a spurious interrupt later. To complete the shield we should
-	 * coordinate disabling the CS irq with flushing the interrupts.
-	 */
-	synchronize_irq(i915->drm.irq);
-
-	intel_engines_park(i915);
-	i915_timelines_park(i915);
-
-	i915_pmu_gt_parked(i915);
-	i915_vma_parked(i915);
-
-	wakeref = fetch_and_zero(&i915->gt.awake);
-	GEM_BUG_ON(!wakeref);
-
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_idle(i915);
-
-	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
-
-	i915_globals_park();
-}
-
-void i915_gem_park(struct drm_i915_private *i915)
-{
-	GEM_TRACE("\n");
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(i915->gt.active_requests);
-
-	if (!i915->gt.awake)
-		return;
-
-	/* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
-	mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100));
-}
-
-void i915_gem_unpark(struct drm_i915_private *i915)
-{
-	GEM_TRACE("\n");
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(!i915->gt.active_requests);
-	assert_rpm_wakelock_held(i915);
-
-	if (i915->gt.awake)
-		return;
-
-	/*
-	 * It seems that the DMC likes to transition between the DC states a lot
-	 * when there are no connected displays (no active power domains) during
-	 * command submission.
-	 *
-	 * This activity has negative impact on the performance of the chip with
-	 * huge latencies observed in the interrupt handler and elsewhere.
-	 *
-	 * Work around it by grabbing a GT IRQ power domain whilst there is any
-	 * GT activity, preventing any DC state transitions.
-	 */
-	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
-	GEM_BUG_ON(!i915->gt.awake);
-
-	i915_globals_unpark();
-
-	intel_enable_gt_powersave(i915);
-	i915_update_gfx_val(i915);
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_busy(i915);
-	i915_pmu_gt_unparked(i915);
-
-	intel_engines_unpark(i915);
-
-	i915_queue_hangcheck(i915);
-
-	queue_delayed_work(i915->wq,
-			   &i915->gt.retire_work,
-			   round_jiffies_up_relative(HZ));
-}
-
 int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file)
@@ -2088,7 +1989,7 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 		if (!err)
 			break;
 
-	} while (flush_delayed_work(&dev_priv->gt.retire_work));
+	} while (flush_delayed_work(&dev_priv->gem.retire_work));
 
 	return err;
 }
@@ -2871,132 +2772,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
-static void
-i915_gem_retire_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), gt.retire_work.work);
-	struct drm_device *dev = &dev_priv->drm;
-
-	/* Come back later if the device is busy... */
-	if (mutex_trylock(&dev->struct_mutex)) {
-		i915_retire_requests(dev_priv);
-		mutex_unlock(&dev->struct_mutex);
-	}
-
-	/*
-	 * Keep the retire handler running until we are finally idle.
-	 * We do not need to do this test under locking as in the worst-case
-	 * we queue the retire worker once too often.
-	 */
-	if (READ_ONCE(dev_priv->gt.awake))
-		queue_delayed_work(dev_priv->wq,
-				   &dev_priv->gt.retire_work,
-				   round_jiffies_up_relative(HZ));
-}
-
-static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
-					  unsigned long mask)
-{
-	bool result = true;
-
-	/*
-	 * Even if we fail to switch, give whatever is running a small chance
-	 * to save itself before we report the failure. Yes, this may be a
-	 * false positive due to e.g. ENOMEM, caveat emptor!
-	 */
-	if (i915_gem_switch_to_kernel_context(i915, mask))
-		result = false;
-
-	if (i915_gem_wait_for_idle(i915,
-				   I915_WAIT_LOCKED |
-				   I915_WAIT_FOR_IDLE_BOOST,
-				   I915_GEM_IDLE_TIMEOUT))
-		result = false;
-
-	if (!result) {
-		if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
-			dev_err(i915->drm.dev,
-				"Failed to idle engines, declaring wedged!\n");
-			GEM_TRACE_DUMP();
-		}
-
-		/* Forcibly cancel outstanding work and leave the gpu quiet. */
-		i915_gem_set_wedged(i915);
-	}
-
-	i915_retire_requests(i915); /* ensure we flush after wedging */
-	return result;
-}
-
-static bool load_power_context(struct drm_i915_private *i915)
-{
-	/* Force loading the kernel context on all engines */
-	if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
-		return false;
-
-	/*
-	 * Immediately park the GPU so that we enable powersaving and
-	 * treat it as idle. The next time we issue a request, we will
-	 * unpark and start using the engine->pinned_default_state, otherwise
-	 * it is in limbo and an early reset may fail.
-	 */
-	__i915_gem_park(i915);
-
-	return true;
-}
-
-static void
-i915_gem_idle_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), gt.idle_work.work);
-	bool rearm_hangcheck;
-
-	if (!READ_ONCE(i915->gt.awake))
-		return;
-
-	if (READ_ONCE(i915->gt.active_requests))
-		return;
-
-	rearm_hangcheck =
-		cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-
-	if (!mutex_trylock(&i915->drm.struct_mutex)) {
-		/* Currently busy, come back later */
-		mod_delayed_work(i915->wq,
-				 &i915->gt.idle_work,
-				 msecs_to_jiffies(50));
-		goto out_rearm;
-	}
-
-	/*
-	 * Flush out the last user context, leaving only the pinned
-	 * kernel context resident. Should anything unfortunate happen
-	 * while we are idle (such as the GPU being power cycled), no users
-	 * will be harmed.
-	 */
-	if (!work_pending(&i915->gt.idle_work.work) &&
-	    !i915->gt.active_requests) {
-		++i915->gt.active_requests; /* don't requeue idle */
-
-		switch_to_kernel_context_sync(i915, i915->gt.active_engines);
-
-		if (!--i915->gt.active_requests) {
-			__i915_gem_park(i915);
-			rearm_hangcheck = false;
-		}
-	}
-
-	mutex_unlock(&i915->drm.struct_mutex);
-
-out_rearm:
-	if (rearm_hangcheck) {
-		GEM_BUG_ON(!i915->gt.awake);
-		i915_queue_hangcheck(i915);
-	}
-}
-
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 {
 	struct drm_i915_private *i915 = to_i915(gem->dev);
@@ -4412,133 +4187,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	mutex_unlock(&i915->drm.struct_mutex);
 }
 
-void i915_gem_suspend(struct drm_i915_private *i915)
-{
-	intel_wakeref_t wakeref;
-
-	GEM_TRACE("\n");
-
-	wakeref = intel_runtime_pm_get(i915);
-
-	flush_workqueue(i915->wq);
-
-	mutex_lock(&i915->drm.struct_mutex);
-
-	/*
-	 * We have to flush all the executing contexts to main memory so
-	 * that they can saved in the hibernation image. To ensure the last
-	 * context image is coherent, we have to switch away from it. That
-	 * leaves the i915->kernel_context still active when
-	 * we actually suspend, and its image in memory may not match the GPU
-	 * state. Fortunately, the kernel_context is disposable and we do
-	 * not rely on its state.
-	 */
-	switch_to_kernel_context_sync(i915, i915->gt.active_engines);
-
-	mutex_unlock(&i915->drm.struct_mutex);
-	i915_reset_flush(i915);
-
-	drain_delayed_work(&i915->gt.retire_work);
-
-	/*
-	 * As the idle_work is rearming if it detects a race, play safe and
-	 * repeat the flush until it is definitely idle.
-	 */
-	drain_delayed_work(&i915->gt.idle_work);
-
-	/*
-	 * Assert that we successfully flushed all the work and
-	 * reset the GPU back to its idle, low power state.
-	 */
-	GEM_BUG_ON(i915->gt.awake);
-
-	intel_uc_suspend(i915);
-
-	intel_runtime_pm_put(i915, wakeref);
-}
-
-void i915_gem_suspend_late(struct drm_i915_private *i915)
-{
-	struct drm_i915_gem_object *obj;
-	struct list_head *phases[] = {
-		&i915->mm.unbound_list,
-		&i915->mm.bound_list,
-		NULL
-	}, **phase;
-
-	/*
-	 * Neither the BIOS, ourselves or any other kernel
-	 * expects the system to be in execlists mode on startup,
-	 * so we need to reset the GPU back to legacy mode. And the only
-	 * known way to disable logical contexts is through a GPU reset.
-	 *
-	 * So in order to leave the system in a known default configuration,
-	 * always reset the GPU upon unload and suspend. Afterwards we then
-	 * clean up the GEM state tracking, flushing off the requests and
-	 * leaving the system in a known idle state.
-	 *
-	 * Note that is of the upmost importance that the GPU is idle and
-	 * all stray writes are flushed *before* we dismantle the backing
-	 * storage for the pinned objects.
-	 *
-	 * However, since we are uncertain that resetting the GPU on older
-	 * machines is a good idea, we don't - just in case it leaves the
-	 * machine in an unusable condition.
-	 */
-
-	mutex_lock(&i915->drm.struct_mutex);
-	for (phase = phases; *phase; phase++) {
-		list_for_each_entry(obj, *phase, mm.link)
-			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
-	}
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	intel_uc_sanitize(i915);
-	i915_gem_sanitize(i915);
-}
-
-void i915_gem_resume(struct drm_i915_private *i915)
-{
-	GEM_TRACE("\n");
-
-	WARN_ON(i915->gt.awake);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
-
-	i915_gem_restore_gtt_mappings(i915);
-	i915_gem_restore_fences(i915);
-
-	/*
-	 * As we didn't flush the kernel context before suspend, we cannot
-	 * guarantee that the context image is complete. So let's just reset
-	 * it and start again.
-	 */
-	intel_gt_resume(i915);
-
-	if (i915_gem_init_hw(i915))
-		goto err_wedged;
-
-	intel_uc_resume(i915);
-
-	/* Always reload a context for powersaving. */
-	if (!load_power_context(i915))
-		goto err_wedged;
-
-out_unlock:
-	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
-	mutex_unlock(&i915->drm.struct_mutex);
-	return;
-
-err_wedged:
-	if (!i915_reset_failed(i915)) {
-		dev_err(i915->drm.dev,
-			"Failed to re-initialize GPU, declaring it wedged!\n");
-		i915_gem_set_wedged(i915);
-	}
-	goto out_unlock;
-}
-
 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
 {
 	if (INTEL_GEN(dev_priv) < 5 ||
@@ -4721,7 +4369,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	}
 
 	/* Flush the default context image to memory, and enable powersaving. */
-	if (!load_power_context(i915)) {
+	if (!i915_gem_load_power_context(i915)) {
 		err = -EIO;
 		goto err_active;
 	}
@@ -5136,11 +4784,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
 
 	i915_gem_init__mm(dev_priv);
+	i915_gem_init__pm(dev_priv);
 
-	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
-			  i915_gem_retire_work_handler);
-	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
-			  i915_gem_idle_work_handler);
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
 	mutex_init(&dev_priv->gpu_error.wedge_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c
new file mode 100644
index 000000000000..9fb0e8d567a2
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_pm.c
@@ -0,0 +1,365 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_pm.h"
+#include "i915_globals.h"
+#include "intel_pm.h"
+
+static void __i915_gem_park(struct drm_i915_private *i915)
+{
+	intel_wakeref_t wakeref;
+
+	GEM_TRACE("\n");
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(i915->gt.active_requests);
+	GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
+
+	if (!i915->gt.awake)
+		return;
+
+	/*
+	 * Be paranoid and flush a concurrent interrupt to make sure
+	 * we don't reactivate any irq tasklets after parking.
+	 *
+	 * FIXME: Note that even though we have waited for execlists to be idle,
+	 * there may still be an in-flight interrupt even though the CSB
+	 * is now empty. synchronize_irq() makes sure that a residual interrupt
+	 * is completed before we continue, but it doesn't prevent the HW from
+	 * raising a spurious interrupt later. To complete the shield we should
+	 * coordinate disabling the CS irq with flushing the interrupts.
+	 */
+	synchronize_irq(i915->drm.irq);
+
+	intel_engines_park(i915);
+	i915_timelines_park(i915);
+
+	i915_pmu_gt_parked(i915);
+	i915_vma_parked(i915);
+
+	wakeref = fetch_and_zero(&i915->gt.awake);
+	GEM_BUG_ON(!wakeref);
+
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_idle(i915);
+
+	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+
+	i915_globals_park();
+}
+
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
+					  unsigned long mask)
+{
+	bool result = true;
+
+	/*
+	 * Even if we fail to switch, give whatever is running a small chance
+	 * to save itself before we report the failure. Yes, this may be a
+	 * false positive due to e.g. ENOMEM, caveat emptor!
+	 */
+	if (i915_gem_switch_to_kernel_context(i915, mask))
+		result = false;
+
+	if (i915_gem_wait_for_idle(i915,
+				   I915_WAIT_LOCKED |
+				   I915_WAIT_FOR_IDLE_BOOST,
+				   I915_GEM_IDLE_TIMEOUT))
+		result = false;
+
+	if (!result) {
+		if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
+			dev_err(i915->drm.dev,
+				"Failed to idle engines, declaring wedged!\n");
+			GEM_TRACE_DUMP();
+		}
+
+		/* Forcibly cancel outstanding work and leave the gpu quiet. */
+		i915_gem_set_wedged(i915);
+	}
+
+	i915_retire_requests(i915); /* ensure we flush after wedging */
+	return result;
+}
+
+static void idle_work_handler(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gem.idle_work.work);
+	bool rearm_hangcheck;
+
+	if (!READ_ONCE(i915->gt.awake))
+		return;
+
+	if (READ_ONCE(i915->gt.active_requests))
+		return;
+
+	rearm_hangcheck =
+		cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+
+	if (!mutex_trylock(&i915->drm.struct_mutex)) {
+		/* Currently busy, come back later */
+		mod_delayed_work(i915->wq,
+				 &i915->gem.idle_work,
+				 msecs_to_jiffies(50));
+		goto out_rearm;
+	}
+
+	/*
+	 * Flush out the last user context, leaving only the pinned
+	 * kernel context resident. Should anything unfortunate happen
+	 * while we are idle (such as the GPU being power cycled), no users
+	 * will be harmed.
+	 */
+	if (!work_pending(&i915->gem.idle_work.work) &&
+	    !i915->gt.active_requests) {
+		++i915->gt.active_requests; /* don't requeue idle */
+
+		switch_to_kernel_context_sync(i915, i915->gt.active_engines);
+
+		if (!--i915->gt.active_requests) {
+			__i915_gem_park(i915);
+			rearm_hangcheck = false;
+		}
+	}
+
+	mutex_unlock(&i915->drm.struct_mutex);
+
+out_rearm:
+	if (rearm_hangcheck) {
+		GEM_BUG_ON(!i915->gt.awake);
+		i915_queue_hangcheck(i915);
+	}
+}
+
+static void retire_work_handler(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gem.retire_work.work);
+
+	/* Come back later if the device is busy... */
+	if (mutex_trylock(&i915->drm.struct_mutex)) {
+		i915_retire_requests(i915);
+		mutex_unlock(&i915->drm.struct_mutex);
+	}
+
+	/*
+	 * Keep the retire handler running until we are finally idle.
+	 * We do not need to do this test under locking as in the worst-case
+	 * we queue the retire worker once too often.
+	 */
+	if (READ_ONCE(i915->gt.awake))
+		queue_delayed_work(i915->wq,
+				   &i915->gem.retire_work,
+				   round_jiffies_up_relative(HZ));
+}
+
+void i915_gem_park(struct drm_i915_private *i915)
+{
+	GEM_TRACE("\n");
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(i915->gt.active_requests);
+
+	if (!i915->gt.awake)
+		return;
+
+	/* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
+	mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(100));
+}
+
+void i915_gem_unpark(struct drm_i915_private *i915)
+{
+	GEM_TRACE("\n");
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(!i915->gt.active_requests);
+	assert_rpm_wakelock_held(i915);
+
+	if (i915->gt.awake)
+		return;
+
+	/*
+	 * It seems that the DMC likes to transition between the DC states a lot
+	 * when there are no connected displays (no active power domains) during
+	 * command submission.
+	 *
+	 * This activity has negative impact on the performance of the chip with
+	 * huge latencies observed in the interrupt handler and elsewhere.
+	 *
+	 * Work around it by grabbing a GT IRQ power domain whilst there is any
+	 * GT activity, preventing any DC state transitions.
+	 */
+	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+	GEM_BUG_ON(!i915->gt.awake);
+
+	i915_globals_unpark();
+
+	intel_enable_gt_powersave(i915);
+	i915_update_gfx_val(i915);
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_busy(i915);
+	i915_pmu_gt_unparked(i915);
+
+	intel_engines_unpark(i915);
+
+	i915_queue_hangcheck(i915);
+
+	queue_delayed_work(i915->wq,
+			   &i915->gem.retire_work,
+			   round_jiffies_up_relative(HZ));
+}
+
+bool i915_gem_load_power_context(struct drm_i915_private *i915)
+{
+	/* Force loading the kernel context on all engines */
+	if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
+		return false;
+
+	/*
+	 * Immediately park the GPU so that we enable powersaving and
+	 * treat it as idle. The next time we issue a request, we will
+	 * unpark and start using the engine->pinned_default_state, otherwise
+	 * it is in limbo and an early reset may fail.
+	 */
+	__i915_gem_park(i915);
+
+	return true;
+}
+
+void i915_gem_suspend(struct drm_i915_private *i915)
+{
+	intel_wakeref_t wakeref;
+
+	GEM_TRACE("\n");
+
+	wakeref = intel_runtime_pm_get(i915);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	/*
+	 * We have to flush all the executing contexts to main memory so
+	 * that they can saved in the hibernation image. To ensure the last
+	 * context image is coherent, we have to switch away from it. That
+	 * leaves the i915->kernel_context still active when
+	 * we actually suspend, and its image in memory may not match the GPU
+	 * state. Fortunately, the kernel_context is disposable and we do
+	 * not rely on its state.
+	 */
+	switch_to_kernel_context_sync(i915, i915->gt.active_engines);
+
+	mutex_unlock(&i915->drm.struct_mutex);
+	i915_reset_flush(i915);
+
+	drain_delayed_work(&i915->gem.retire_work);
+
+	/*
+	 * As the idle_work is rearming if it detects a race, play safe and
+	 * repeat the flush until it is definitely idle.
+	 */
+	drain_delayed_work(&i915->gem.idle_work);
+
+	flush_workqueue(i915->wq);
+
+	/*
+	 * Assert that we successfully flushed all the work and
+	 * reset the GPU back to its idle, low power state.
+	 */
+	GEM_BUG_ON(i915->gt.awake);
+
+	intel_uc_suspend(i915);
+
+	intel_runtime_pm_put(i915, wakeref);
+}
+
+void i915_gem_suspend_late(struct drm_i915_private *i915)
+{
+	struct drm_i915_gem_object *obj;
+	struct list_head *phases[] = {
+		&i915->mm.unbound_list,
+		&i915->mm.bound_list,
+		NULL
+	}, **phase;
+
+	/*
+	 * Neither the BIOS, ourselves or any other kernel
+	 * expects the system to be in execlists mode on startup,
+	 * so we need to reset the GPU back to legacy mode. And the only
+	 * known way to disable logical contexts is through a GPU reset.
+	 *
+	 * So in order to leave the system in a known default configuration,
+	 * always reset the GPU upon unload and suspend. Afterwards we then
+	 * clean up the GEM state tracking, flushing off the requests and
+	 * leaving the system in a known idle state.
+	 *
+	 * Note that is of the upmost importance that the GPU is idle and
+	 * all stray writes are flushed *before* we dismantle the backing
+	 * storage for the pinned objects.
+	 *
+	 * However, since we are uncertain that resetting the GPU on older
+	 * machines is a good idea, we don't - just in case it leaves the
+	 * machine in an unusable condition.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	for (phase = phases; *phase; phase++) {
+		list_for_each_entry(obj, *phase, mm.link)
+			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	intel_uc_sanitize(i915);
+	i915_gem_sanitize(i915);
+}
+
+void i915_gem_resume(struct drm_i915_private *i915)
+{
+	GEM_TRACE("\n");
+
+	WARN_ON(i915->gt.awake);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+
+	i915_gem_restore_gtt_mappings(i915);
+	i915_gem_restore_fences(i915);
+
+	/*
+	 * As we didn't flush the kernel context before suspend, we cannot
+	 * guarantee that the context image is complete. So let's just reset
+	 * it and start again.
+	 */
+	intel_gt_resume(i915);
+
+	if (i915_gem_init_hw(i915))
+		goto err_wedged;
+
+	intel_uc_resume(i915);
+
+	/* Always reload a context for powersaving. */
+	if (!i915_gem_load_power_context(i915))
+		goto err_wedged;
+
+out_unlock:
+	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return;
+
+err_wedged:
+	if (!i915_reset_failed(i915)) {
+		dev_err(i915->drm.dev,
+			"Failed to re-initialize GPU, declaring it wedged!\n");
+		i915_gem_set_wedged(i915);
+	}
+	goto out_unlock;
+}
+
+void i915_gem_init__pm(struct drm_i915_private *i915)
+{
+	INIT_DELAYED_WORK(&i915->gem.idle_work, idle_work_handler);
+	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h
new file mode 100644
index 000000000000..52f65e3f06b5
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_pm.h
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_PM_H__
+#define __I915_GEM_PM_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct work_struct;
+
+void i915_gem_init__pm(struct drm_i915_private *i915);
+
+bool i915_gem_load_power_context(struct drm_i915_private *i915);
+void i915_gem_resume(struct drm_i915_private *i915);
+
+void i915_gem_unpark(struct drm_i915_private *i915);
+void i915_gem_park(struct drm_i915_private *i915);
+
+void i915_gem_idle_work_handler(struct work_struct *work);
+
+void i915_gem_suspend(struct drm_i915_private *i915);
+void i915_gem_suspend_late(struct drm_i915_private *i915);
+
+#endif /* __I915_GEM_PM_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 6f52ca881173..9d646fa1b74e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1658,7 +1658,7 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
 		/* XXX Bonus points for proving we are the kernel context! */
 
 		mutex_unlock(&i915->drm.struct_mutex);
-		drain_delayed_work(&i915->gt.idle_work);
+		drain_delayed_work(&i915->gem.idle_work);
 		mutex_lock(&i915->drm.struct_mutex);
 	}
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 971148fbe6f5..12fc53c694a6 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -514,8 +514,8 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 	}
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	cancel_delayed_work_sync(&i915->gt.retire_work);
-	cancel_delayed_work_sync(&i915->gt.idle_work);
+	cancel_delayed_work_sync(&i915->gem.retire_work);
+	cancel_delayed_work_sync(&i915->gem.idle_work);
 }
 
 static int igt_mmap_offset_exhaustion(void *arg)
@@ -617,9 +617,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
 out_park:
 	mutex_lock(&i915->drm.struct_mutex);
 	if (--i915->gt.active_requests)
-		queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
+		queue_delayed_work(i915->wq, &i915->gem.retire_work, 0);
 	else
-		queue_delayed_work(i915->wq, &i915->gt.idle_work, 0);
+		queue_delayed_work(i915->wq, &i915->gem.idle_work, 0);
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_gem_shrinker_register(i915);
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index f444ee5add27..fb677b4019a0 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -59,8 +59,8 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	drain_delayed_work(&i915->gt.retire_work);
-	drain_delayed_work(&i915->gt.idle_work);
+	drain_delayed_work(&i915->gem.retire_work);
+	drain_delayed_work(&i915->gem.idle_work);
 	i915_gem_drain_workqueue(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
@@ -111,7 +111,7 @@ static void mock_retire_work_handler(struct work_struct *work)
 static void mock_idle_work_handler(struct work_struct *work)
 {
 	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), gt.idle_work.work);
+		container_of(work, typeof(*i915), gem.idle_work.work);
 
 	i915->gt.active_engines = 0;
 }
@@ -197,8 +197,8 @@ struct drm_i915_private *mock_gem_device(void)
 
 	mock_init_contexts(i915);
 
-	INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler);
-	INIT_DELAYED_WORK(&i915->gt.idle_work, mock_idle_work_handler);
+	INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler);
+	INIT_DELAYED_WORK(&i915->gem.idle_work, mock_idle_work_handler);
 
 	i915->gt.awake = true;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 10/32] drm/i915: Introduce context->enter() and context->exit()
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (7 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 09/32] drm/i915: Pull the GEM powermangement coupling into its own file Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 11/32] drm/i915: Pass intel_context to i915_request_create() Chris Wilson
                   ` (25 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

We wish to start segregating the power management into different control
domains, both with respect to the hardware and the user interface. The
first step is that at the lowest level flow of requests, we want to
process a context event (and not a global GEM operation). In this patch,
we introduce the context callbacks that in future patches will be
redirected to per-engine interfaces leading to global operations as
required.

The intent is that this will be guarded by the timeline->mutex, except
that retiring has not quite finished transitioning over from being
guarded by struct_mutex. So at the moment it is protected by
struct_mutex with a reminded to switch.

v2: Rename default handlers to intel_context_enter_engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 17 ++++++++++++++
 drivers/gpu/drm/i915/gt/intel_context.h       | 21 ++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  5 +++++
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  3 +++
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  3 +++
 drivers/gpu/drm/i915/gt/mock_engine.c         |  3 +++
 drivers/gpu/drm/i915/i915_request.c           | 22 ++++---------------
 7 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ebd1e5919a4a..4410e20e8e13 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -266,3 +266,20 @@ int __init i915_global_context_init(void)
 	i915_global_register(&global.base);
 	return 0;
 }
+
+void intel_context_enter_engine(struct intel_context *ce)
+{
+	struct drm_i915_private *i915 = ce->gem_context->i915;
+
+	if (!i915->gt.active_requests++)
+		i915_gem_unpark(i915);
+}
+
+void intel_context_exit_engine(struct intel_context *ce)
+{
+	struct drm_i915_private *i915 = ce->gem_context->i915;
+
+	GEM_BUG_ON(!i915->gt.active_requests);
+	if (!--i915->gt.active_requests)
+		i915_gem_park(i915);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index ebc861b1a49e..b732cf99efcb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -73,6 +73,27 @@ static inline void __intel_context_pin(struct intel_context *ce)
 
 void intel_context_unpin(struct intel_context *ce);
 
+void intel_context_enter_engine(struct intel_context *ce);
+void intel_context_exit_engine(struct intel_context *ce);
+
+static inline void intel_context_enter(struct intel_context *ce)
+{
+	if (!ce->active_count++)
+		ce->ops->enter(ce);
+}
+
+static inline void intel_context_mark_active(struct intel_context *ce)
+{
+	++ce->active_count;
+}
+
+static inline void intel_context_exit(struct intel_context *ce)
+{
+	GEM_BUG_ON(!ce->active_count);
+	if (!--ce->active_count)
+		ce->ops->exit(ce);
+}
+
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
 	kref_get(&ce->ref);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 9ec4f787c908..f02d27734e3b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -25,6 +25,9 @@ struct intel_context_ops {
 	int (*pin)(struct intel_context *ce);
 	void (*unpin)(struct intel_context *ce);
 
+	void (*enter)(struct intel_context *ce);
+	void (*exit)(struct intel_context *ce);
+
 	void (*reset)(struct intel_context *ce);
 	void (*destroy)(struct kref *kref);
 };
@@ -46,6 +49,8 @@ struct intel_context {
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
 
+	unsigned int active_count; /* notionally protected by timeline->mutex */
+
 	atomic_t pin_count;
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 5cadf8f6a23d..edec7f183688 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1315,6 +1315,9 @@ static const struct intel_context_ops execlists_context_ops = {
 	.pin = execlists_context_pin,
 	.unpin = execlists_context_unpin,
 
+	.enter = intel_context_enter_engine,
+	.exit = intel_context_exit_engine,
+
 	.reset = execlists_context_reset,
 	.destroy = execlists_context_destroy,
 };
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index c1214fd25702..c6d2d7ff7f32 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1516,6 +1516,9 @@ static const struct intel_context_ops ring_context_ops = {
 	.pin = ring_context_pin,
 	.unpin = ring_context_unpin,
 
+	.enter = intel_context_enter_engine,
+	.exit = intel_context_exit_engine,
+
 	.reset = ring_context_reset,
 	.destroy = ring_context_destroy,
 };
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 414afd2f27fe..bcfeb0c67997 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -157,6 +157,9 @@ static const struct intel_context_ops mock_context_ops = {
 	.pin = mock_context_pin,
 	.unpin = mock_context_unpin,
 
+	.enter = intel_context_enter_engine,
+	.exit = intel_context_exit_engine,
+
 	.destroy = mock_context_destroy,
 };
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 74ae698c1f95..8129886992ca 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -131,19 +131,6 @@ i915_request_remove_from_client(struct i915_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static void reserve_gt(struct drm_i915_private *i915)
-{
-	if (!i915->gt.active_requests++)
-		i915_gem_unpark(i915);
-}
-
-static void unreserve_gt(struct drm_i915_private *i915)
-{
-	GEM_BUG_ON(!i915->gt.active_requests);
-	if (!--i915->gt.active_requests)
-		i915_gem_park(i915);
-}
-
 static void advance_ring(struct i915_request *request)
 {
 	struct intel_ring *ring = request->ring;
@@ -301,11 +288,10 @@ static void i915_request_retire(struct i915_request *request)
 
 	i915_request_remove_from_client(request);
 
-	intel_context_unpin(request->hw_context);
-
 	__retire_engine_upto(request->engine, request);
 
-	unreserve_gt(request->i915);
+	intel_context_exit(request->hw_context);
+	intel_context_unpin(request->hw_context);
 
 	i915_sched_node_fini(&request->sched);
 	i915_request_put(request);
@@ -660,8 +646,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	if (IS_ERR(ce))
 		return ERR_CAST(ce);
 
-	reserve_gt(i915);
 	mutex_lock(&ce->ring->timeline->mutex);
+	intel_context_enter(ce);
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
 	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
@@ -792,8 +778,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 err_free:
 	kmem_cache_free(global.slab_requests, rq);
 err_unreserve:
+	intel_context_exit(ce);
 	mutex_unlock(&ce->ring->timeline->mutex);
-	unreserve_gt(i915);
 	intel_context_unpin(ce);
 	return ERR_PTR(ret);
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 11/32] drm/i915: Pass intel_context to i915_request_create()
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (8 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 10/32] drm/i915: Introduce context->enter() and context->exit() Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 12/32] drm/i915: Invert the GEM wakeref hierarchy Chris Wilson
                   ` (24 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Start acquiring the logical intel_context and using that as our primary
means for request allocation. This is the initial step to allow us to
avoid requiring struct_mutex for request allocation along the
perma-pinned kernel context, but it also provides a foundation for
breaking up the complex request allocation to handle different scenarios
inside execbuf.

For the purpose of emitting a request from inside retirement (see the
next patch for engine power management), we also need to lift control
over the timeline mutex to the caller.

v2: Note that the request carries the active reference upon construction.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.h       |  12 +
 drivers/gpu/drm/i915/gt/intel_reset.c         |   2 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   3 -
 drivers/gpu/drm/i915/gt/intel_workarounds.c   |   2 +-
 drivers/gpu/drm/i915/i915_gem_context.c       |   4 +-
 drivers/gpu/drm/i915/i915_perf.c              |   2 +-
 drivers/gpu/drm/i915/i915_request.c           | 247 ++++++++++--------
 drivers/gpu/drm/i915/i915_request.h           |   7 +
 drivers/gpu/drm/i915/intel_overlay.c          |   5 +-
 drivers/gpu/drm/i915/selftests/i915_active.c  |   2 +-
 .../drm/i915/selftests/i915_gem_coherency.c   |   2 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |   2 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |   9 +-
 .../gpu/drm/i915/selftests/i915_timeline.c    |   4 +-
 14 files changed, 177 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index b732cf99efcb..60379eb37949 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -105,4 +105,16 @@ static inline void intel_context_put(struct intel_context *ce)
 	kref_put(&ce->ref, ce->ops->destroy);
 }
 
+static inline void intel_context_timeline_lock(struct intel_context *ce)
+	__acquires(&ce->ring->timeline->mutex)
+{
+	mutex_lock(&ce->ring->timeline->mutex);
+}
+
+static inline void intel_context_timeline_unlock(struct intel_context *ce)
+	__releases(&ce->ring->timeline->mutex)
+{
+	mutex_unlock(&ce->ring->timeline->mutex);
+}
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 9731a2295639..996164d07397 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -786,7 +786,7 @@ static void restart_work(struct work_struct *work)
 		if (!intel_engine_is_idle(engine))
 			continue;
 
-		rq = i915_request_alloc(engine, i915->kernel_context);
+		rq = i915_request_create(engine->kernel_context);
 		if (!IS_ERR(rq))
 			i915_request_add(rq);
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index c6d2d7ff7f32..b2bb7d4bfbe3 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1778,7 +1778,6 @@ static int switch_context(struct i915_request *rq)
 	u32 hw_flags = 0;
 	int ret, i;
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
 	if (ppgtt) {
@@ -1908,8 +1907,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 	struct i915_request *target;
 	long timeout;
 
-	lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex);
-
 	if (intel_ring_update_space(ring) >= bytes)
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index f46ed0e2f07c..364696221fd7 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1356,7 +1356,7 @@ static int engine_wa_list_verify(struct intel_engine_cs *engine,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	rq = i915_request_alloc(engine, engine->kernel_context->gem_context);
+	rq = i915_request_create(engine->kernel_context);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 37dff694456c..3eb1a664b5fa 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -942,7 +942,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
 		struct intel_ring *ring;
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, i915->kernel_context);
+		rq = i915_request_create(engine->kernel_context);
 		if (IS_ERR(rq))
 			return PTR_ERR(rq);
 
@@ -1188,7 +1188,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	/* Submitting requests etc needs the hw awake. */
 	wakeref = intel_runtime_pm_get(i915);
 
-	rq = i915_request_alloc(ce->engine, i915->kernel_context);
+	rq = i915_request_create(ce->engine->kernel_context);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 		goto out_put;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index a87f790335c1..328a740e72cb 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1762,7 +1762,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 	 * Apply the configuration by doing one context restore of the edited
 	 * context image.
 	 */
-	rq = i915_request_alloc(engine, dev_priv->kernel_context);
+	rq = i915_request_create(engine->kernel_context);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 8129886992ca..672c9ea6c24f 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -582,7 +582,7 @@ static void ring_retire_requests(struct intel_ring *ring)
 }
 
 static noinline struct i915_request *
-i915_request_alloc_slow(struct intel_context *ce)
+request_alloc_slow(struct intel_context *ce, gfp_t gfp)
 {
 	struct intel_ring *ring = ce->ring;
 	struct i915_request *rq;
@@ -590,6 +590,9 @@ i915_request_alloc_slow(struct intel_context *ce)
 	if (list_empty(&ring->request_list))
 		goto out;
 
+	if (!gfpflags_allow_blocking(gfp))
+		goto out;
+
 	/* Ratelimit ourselves to prevent oom from malicious clients */
 	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
 	cond_synchronize_rcu(rq->rcustate);
@@ -598,62 +601,21 @@ i915_request_alloc_slow(struct intel_context *ce)
 	ring_retire_requests(ring);
 
 out:
-	return kmem_cache_alloc(global.slab_requests, GFP_KERNEL);
+	return kmem_cache_alloc(global.slab_requests, gfp);
 }
 
-/**
- * i915_request_alloc - allocate a request structure
- *
- * @engine: engine that we wish to issue the request on.
- * @ctx: context that the request will be associated with.
- *
- * Returns a pointer to the allocated request if successful,
- * or an error code if not.
- */
 struct i915_request *
-i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
+__i915_request_create(struct intel_context *ce, gfp_t gfp)
 {
-	struct drm_i915_private *i915 = engine->i915;
-	struct intel_context *ce;
-	struct i915_timeline *tl;
+	struct i915_timeline *tl = ce->ring->timeline;
 	struct i915_request *rq;
 	u32 seqno;
 	int ret;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	/*
-	 * Preempt contexts are reserved for exclusive use to inject a
-	 * preemption context switch. They are never to be used for any trivial
-	 * request!
-	 */
-	GEM_BUG_ON(ctx == i915->preempt_context);
-
-	/*
-	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-	 * EIO if the GPU is already wedged.
-	 */
-	ret = i915_terminally_wedged(i915);
-	if (ret)
-		return ERR_PTR(ret);
+	might_sleep_if(gfpflags_allow_blocking(gfp));
 
-	/*
-	 * Pinning the contexts may generate requests in order to acquire
-	 * GGTT space, so do this first before we reserve a seqno for
-	 * ourselves.
-	 */
-	ce = intel_context_pin(ctx, engine);
-	if (IS_ERR(ce))
-		return ERR_CAST(ce);
-
-	mutex_lock(&ce->ring->timeline->mutex);
-	intel_context_enter(ce);
-
-	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
-	    i915_request_completed(rq))
-		i915_request_retire(rq);
+	/* Check that the caller provided an already pinned context */
+	__intel_context_pin(ce);
 
 	/*
 	 * Beware: Dragons be flying overhead.
@@ -685,30 +647,26 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * Do not use kmem_cache_zalloc() here!
 	 */
 	rq = kmem_cache_alloc(global.slab_requests,
-			      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (unlikely(!rq)) {
-		rq = i915_request_alloc_slow(ce);
+		rq = request_alloc_slow(ce, gfp);
 		if (!rq) {
 			ret = -ENOMEM;
 			goto err_unreserve;
 		}
 	}
 
-	INIT_LIST_HEAD(&rq->active_list);
-	INIT_LIST_HEAD(&rq->execute_cb);
-
-	tl = ce->ring->timeline;
 	ret = i915_timeline_get_seqno(tl, rq, &seqno);
 	if (ret)
 		goto err_free;
 
-	rq->i915 = i915;
-	rq->engine = engine;
-	rq->gem_context = ctx;
+	rq->i915 = ce->engine->i915;
 	rq->hw_context = ce;
+	rq->gem_context = ce->gem_context;
+	rq->engine = ce->engine;
 	rq->ring = ce->ring;
 	rq->timeline = tl;
-	GEM_BUG_ON(rq->timeline == &engine->timeline);
+	GEM_BUG_ON(rq->timeline == &ce->engine->timeline);
 	rq->hwsp_seqno = tl->hwsp_seqno;
 	rq->hwsp_cacheline = tl->hwsp_cacheline;
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
@@ -729,6 +687,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->capture_list = NULL;
 	rq->waitboost = false;
 
+	INIT_LIST_HEAD(&rq->active_list);
+	INIT_LIST_HEAD(&rq->execute_cb);
+
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
 	 * eventually emit this request. This is to guarantee that the
@@ -741,7 +702,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * around inside i915_request_add() there is sufficient space at
 	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);
+	rq->reserved_space =
+		2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -751,20 +713,16 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
-	ret = engine->request_alloc(rq);
+	ret = rq->engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
 
-	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(ce);
-
 	rq->infix = rq->ring->emit; /* end of header; start of user payload */
 
-	/* Check that we didn't interrupt ourselves with a new request */
-	lockdep_assert_held(&rq->timeline->mutex);
-	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
-	rq->cookie = lockdep_pin_lock(&rq->timeline->mutex);
+	/* Keep a second pin for the dual retirement along engine and ring */
+	__intel_context_pin(ce);
 
+	intel_context_mark_active(ce);
 	return rq;
 
 err_unwind:
@@ -778,12 +736,86 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 err_free:
 	kmem_cache_free(global.slab_requests, rq);
 err_unreserve:
-	intel_context_exit(ce);
-	mutex_unlock(&ce->ring->timeline->mutex);
 	intel_context_unpin(ce);
 	return ERR_PTR(ret);
 }
 
+struct i915_request *
+i915_request_create(struct intel_context *ce)
+{
+	struct i915_request *rq;
+
+	intel_context_timeline_lock(ce);
+
+	/* Move our oldest request to the slab-cache (if not in use!) */
+	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
+	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
+	    i915_request_completed(rq))
+		i915_request_retire(rq);
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_KERNEL);
+	intel_context_exit(ce); /* active reference transferred to request */
+	if (IS_ERR(rq))
+		goto err_unlock;
+
+	/* Check that we do not interrupt ourselves with a new request */
+	rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex);
+
+	return rq;
+
+err_unlock:
+	intel_context_timeline_unlock(ce);
+	return rq;
+}
+
+/**
+ * i915_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct i915_request *
+i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_context *ce;
+	struct i915_request *rq;
+	int ret;
+
+	/*
+	 * Preempt contexts are reserved for exclusive use to inject a
+	 * preemption context switch. They are never to be used for any trivial
+	 * request!
+	 */
+	GEM_BUG_ON(ctx == i915->preempt_context);
+
+	/*
+	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+	 * EIO if the GPU is already wedged.
+	 */
+	ret = i915_terminally_wedged(i915);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ce = intel_context_pin(ctx, engine);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	rq = i915_request_create(ce);
+	intel_context_unpin(ce);
+
+	return rq;
+}
+
 static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
@@ -1044,8 +1076,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * precludes optimising to use semaphores serialisation of a single
 	 * timeline across engines.
 	 */
-	prev = i915_active_request_raw(&timeline->last_request,
-				       &rq->i915->drm.struct_mutex);
+	prev = rcu_dereference_protected(timeline->last_request.request, 1);
 	if (prev && !i915_request_completed(prev)) {
 		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
 			i915_sw_fence_await_sw_fence(&rq->submit,
@@ -1066,6 +1097,11 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	list_add_tail(&rq->link, &timeline->requests);
 	spin_unlock_irq(&timeline->lock);
 
+	/*
+	 * Make sure that no request gazumped us - if it was allocated after
+	 * our i915_request_alloc() and called __i915_request_add() before
+	 * us, the timeline will hold its seqno which is later than ours.
+	 */
 	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
 	__i915_active_request_set(&timeline->last_request, rq);
 
@@ -1077,36 +1113,23 @@ __i915_request_add_to_timeline(struct i915_request *rq)
  * request is not being tracked for completion but the work itself is
  * going to happen on the hardware. This would be a Bad Thing(tm).
  */
-void i915_request_add(struct i915_request *request)
+struct i915_request *__i915_request_commit(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct i915_timeline *timeline = request->timeline;
-	struct intel_ring *ring = request->ring;
+	struct intel_engine_cs *engine = rq->engine;
+	struct intel_ring *ring = rq->ring;
 	struct i915_request *prev;
 	u32 *cs;
 
 	GEM_TRACE("%s fence %llx:%lld\n",
-		  engine->name, request->fence.context, request->fence.seqno);
-
-	lockdep_assert_held(&request->timeline->mutex);
-	lockdep_unpin_lock(&request->timeline->mutex, request->cookie);
-
-	trace_i915_request_add(request);
-
-	/*
-	 * Make sure that no request gazumped us - if it was allocated after
-	 * our i915_request_alloc() and called __i915_request_add() before
-	 * us, the timeline will hold its seqno which is later than ours.
-	 */
-	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
+		  engine->name, rq->fence.context, rq->fence.seqno);
 
 	/*
 	 * To ensure that this call will not fail, space for its emissions
 	 * should already have been reserved in the ring buffer. Let the ring
 	 * know that it is time to use that space up.
 	 */
-	GEM_BUG_ON(request->reserved_space > request->ring->space);
-	request->reserved_space = 0;
+	GEM_BUG_ON(rq->reserved_space > ring->space);
+	rq->reserved_space = 0;
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
@@ -1114,17 +1137,17 @@ void i915_request_add(struct i915_request *request)
 	 * GPU processing the request, we never over-estimate the
 	 * position of the ring's HEAD.
 	 */
-	cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);
+	cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
 	GEM_BUG_ON(IS_ERR(cs));
-	request->postfix = intel_ring_offset(request, cs);
+	rq->postfix = intel_ring_offset(rq, cs);
 
-	prev = __i915_request_add_to_timeline(request);
+	prev = __i915_request_add_to_timeline(rq);
 
-	list_add_tail(&request->ring_link, &ring->request_list);
-	if (list_is_first(&request->ring_link, &ring->request_list))
-		list_add(&ring->active_link, &request->i915->gt.active_rings);
-	request->i915->gt.active_engines |= request->engine->mask;
-	request->emitted_jiffies = jiffies;
+	list_add_tail(&rq->ring_link, &ring->request_list);
+	if (list_is_first(&rq->ring_link, &ring->request_list))
+		list_add(&ring->active_link, &rq->i915->gt.active_rings);
+	rq->i915->gt.active_engines |= rq->engine->mask;
+	rq->emitted_jiffies = jiffies;
 
 	/*
 	 * Let the backend know a new request has arrived that may need
@@ -1138,10 +1161,10 @@ void i915_request_add(struct i915_request *request)
 	 * run at the earliest possible convenience.
 	 */
 	local_bh_disable();
-	i915_sw_fence_commit(&request->semaphore);
+	i915_sw_fence_commit(&rq->semaphore);
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule) {
-		struct i915_sched_attr attr = request->gem_context->sched;
+		struct i915_sched_attr attr = rq->gem_context->sched;
 
 		/*
 		 * Boost actual workloads past semaphores!
@@ -1155,7 +1178,7 @@ void i915_request_add(struct i915_request *request)
 		 * far in the distance past over useful work, we keep a history
 		 * of any semaphore use along our dependency chain.
 		 */
-		if (!(request->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
+		if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
 			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
 
 		/*
@@ -1164,15 +1187,29 @@ void i915_request_add(struct i915_request *request)
 		 * Allow interactive/synchronous clients to jump ahead of
 		 * the bulk clients. (FQ_CODEL)
 		 */
-		if (list_empty(&request->sched.signalers_list))
+		if (list_empty(&rq->sched.signalers_list))
 			attr.priority |= I915_PRIORITY_NEWCLIENT;
 
-		engine->schedule(request, &attr);
+		engine->schedule(rq, &attr);
 	}
 	rcu_read_unlock();
-	i915_sw_fence_commit(&request->submit);
+	i915_sw_fence_commit(&rq->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 
+	return prev;
+}
+
+void i915_request_add(struct i915_request *rq)
+{
+	struct i915_request *prev;
+
+	lockdep_assert_held(&rq->timeline->mutex);
+	lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
+
+	trace_i915_request_add(rq);
+
+	prev = __i915_request_commit(rq);
+
 	/*
 	 * In typical scenarios, we do not expect the previous request on
 	 * the timeline to be still tracked by timeline->last_request if it
@@ -1193,7 +1230,7 @@ void i915_request_add(struct i915_request *request)
 	if (prev && i915_request_completed(prev))
 		i915_request_retire_upto(prev);
 
-	mutex_unlock(&request->timeline->mutex);
+	mutex_unlock(&rq->timeline->mutex);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index a982664618c2..36f13b74ec58 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -239,6 +239,13 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence)
 	return fence->ops == &i915_fence_ops;
 }
 
+struct i915_request * __must_check
+__i915_request_create(struct intel_context *ce, gfp_t gfp);
+struct i915_request * __must_check
+i915_request_create(struct intel_context *ce);
+
+struct i915_request *__i915_request_commit(struct i915_request *request);
+
 struct i915_request * __must_check
 i915_request_alloc(struct intel_engine_cs *engine,
 		   struct i915_gem_context *ctx);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index eb317759b5d3..5c496b11ab5c 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -235,10 +235,9 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
 
 static struct i915_request *alloc_request(struct intel_overlay *overlay)
 {
-	struct drm_i915_private *dev_priv = overlay->i915;
-	struct intel_engine_cs *engine = dev_priv->engine[RCS0];
+	struct intel_engine_cs *engine = overlay->i915->engine[RCS0];
 
-	return i915_request_alloc(engine, dev_priv->kernel_context);
+	return i915_request_create(engine->kernel_context);
 }
 
 /* overlay needs to be disable in OCMD reg */
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 27d8f853111b..eee838dc0634 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -46,7 +46,7 @@ static int __live_active_setup(struct drm_i915_private *i915,
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, i915->kernel_context);
+		rq = i915_request_create(engine->kernel_context);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			break;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index e43630b40fce..046a38743152 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -202,7 +202,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
+	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
 	if (IS_ERR(rq)) {
 		i915_vma_unpin(vma);
 		return PTR_ERR(rq);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 12fc53c694a6..12203d665a4e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -468,7 +468,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 	if (err)
 		return err;
 
-	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
+	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
 	if (IS_ERR(rq)) {
 		i915_vma_unpin(vma);
 		return PTR_ERR(rq);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index e6ffe2240126..098d7b3aa131 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -551,8 +551,7 @@ static int live_nop_request(void *arg)
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
-				request = i915_request_alloc(engine,
-							     i915->kernel_context);
+				request = i915_request_create(engine->kernel_context);
 				if (IS_ERR(request)) {
 					err = PTR_ERR(request);
 					goto out_unlock;
@@ -649,7 +648,7 @@ empty_request(struct intel_engine_cs *engine,
 	struct i915_request *request;
 	int err;
 
-	request = i915_request_alloc(engine, engine->i915->kernel_context);
+	request = i915_request_create(engine->kernel_context);
 	if (IS_ERR(request))
 		return request;
 
@@ -853,7 +852,7 @@ static int live_all_engines(void *arg)
 	}
 
 	for_each_engine(engine, i915, id) {
-		request[id] = i915_request_alloc(engine, i915->kernel_context);
+		request[id] = i915_request_create(engine->kernel_context);
 		if (IS_ERR(request[id])) {
 			err = PTR_ERR(request[id]);
 			pr_err("%s: Request allocation failed with err=%d\n",
@@ -962,7 +961,7 @@ static int live_sequential_engines(void *arg)
 			goto out_unlock;
 		}
 
-		request[id] = i915_request_alloc(engine, i915->kernel_context);
+		request[id] = i915_request_create(engine->kernel_context);
 		if (IS_ERR(request[id])) {
 			err = PTR_ERR(request[id]);
 			pr_err("%s: Request allocation failed for %s with err=%d\n",
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
index bd96afcadfe7..ff9ebe50fae8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -454,7 +454,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
 		goto out;
 	}
 
-	rq = i915_request_alloc(engine, engine->i915->kernel_context);
+	rq = i915_request_create(engine->kernel_context);
 	if (IS_ERR(rq))
 		goto out_unpin;
 
@@ -678,7 +678,7 @@ static int live_hwsp_wrap(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
-		rq = i915_request_alloc(engine, i915->kernel_context);
+		rq = i915_request_create(engine->kernel_context);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto out;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 12/32] drm/i915: Invert the GEM wakeref hierarchy
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (9 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 11/32] drm/i915: Pass intel_context to i915_request_create() Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-18 12:42   ` Tvrtko Ursulin
  2019-04-23 13:02   ` Tvrtko Ursulin
  2019-04-17  7:56 ` [PATCH 13/32] drm/i915/gvt: Pin the per-engine GVT shadow contexts Chris Wilson
                   ` (23 subsequent siblings)
  34 siblings, 2 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

In the current scheme, on submitting a request we take a single global
GEM wakeref, which trickles down to wake up all GT power domains. This
is undesirable as we would like to be able to localise our power
management to the available power domains and to remove the global GEM
operations from the heart of the driver. (The intent there is to push
global GEM decisions to the boundary as used by the GEM user interface.)

Now during request construction, each request is responsible via its
logical context to acquire a wakeref on each power domain it intends to
utilize. Currently, each request takes a wakeref on the engine(s) and
the engines themselves take a chipset wakeref. This gives us a
transition on each engine which we can extend if we want to insert more
powermangement control (such as soft rc6). The global GEM operations
that currently require a struct_mutex are reduced to listening to pm
events from the chipset GT wakeref. As we reduce the struct_mutex
requirement, these listeners should evaporate.

Perhaps the biggest immediate change is that this removes the
struct_mutex requirement around GT power management, allowing us greater
flexibility in request construction. Another important knock-on effect,
is that by tracking engine usage, we can insert a switch back to the
kernel context on that engine immediately, avoiding any extra delay or
inserting global synchronisation barriers. This makes tracking when an
engine and its associated contexts are idle much easier -- important for
when we forgo our assumed execution ordering and need idle barriers to
unpin used contexts. In the process, it means we remove a large chunk of
code whose only purpose was to switch back to the kernel context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   2 +
 drivers/gpu/drm/i915/gt/intel_context.c       |  18 +-
 drivers/gpu/drm/i915/gt/intel_engine.h        |   9 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 142 +---------
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     | 153 ++++++++++
 drivers/gpu/drm/i915/gt/intel_engine_pm.h     |  20 ++
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   7 +-
 drivers/gpu/drm/i915/gt/intel_gt_pm.c         | 143 ++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_pm.h         |  27 ++
 drivers/gpu/drm/i915/gt/intel_hangcheck.c     |   7 +
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   6 +-
 drivers/gpu/drm/i915/gt/intel_reset.c         | 101 +------
 drivers/gpu/drm/i915/gt/intel_reset.h         |   1 -
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  16 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |   3 +
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  49 +---
 .../gpu/drm/i915/gt/selftest_workarounds.c    |   5 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |  16 +-
 drivers/gpu/drm/i915/i915_drv.c               |   5 +-
 drivers/gpu/drm/i915/i915_drv.h               |   8 +-
 drivers/gpu/drm/i915/i915_gem.c               |  41 +--
 drivers/gpu/drm/i915/i915_gem.h               |   3 -
 drivers/gpu/drm/i915/i915_gem_context.c       |  85 +-----
 drivers/gpu/drm/i915/i915_gem_context.h       |   4 -
 drivers/gpu/drm/i915/i915_gem_evict.c         |  47 +---
 drivers/gpu/drm/i915/i915_gem_pm.c            | 264 ++++++------------
 drivers/gpu/drm/i915/i915_gem_pm.h            |   3 -
 drivers/gpu/drm/i915/i915_gpu_error.h         |   4 -
 drivers/gpu/drm/i915/i915_request.c           |  10 +-
 drivers/gpu/drm/i915/i915_request.h           |   2 +-
 drivers/gpu/drm/i915/intel_uc.c               |  22 +-
 drivers/gpu/drm/i915/intel_uc.h               |   2 +-
 drivers/gpu/drm/i915/selftests/i915_gem.c     |  16 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c | 114 +-------
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  29 +-
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  32 ++-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  15 +-
 37 files changed, 598 insertions(+), 833 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pm.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pm.h
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 858642c7bc40..dd8d923aa1c6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -71,6 +71,8 @@ gt-y += \
 	gt/intel_breadcrumbs.o \
 	gt/intel_context.o \
 	gt/intel_engine_cs.o \
+	gt/intel_engine_pm.o \
+	gt/intel_gt_pm.o \
 	gt/intel_hangcheck.o \
 	gt/intel_lrc.o \
 	gt/intel_reset.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 4410e20e8e13..298e463ad082 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -10,6 +10,7 @@
 
 #include "intel_context.h"
 #include "intel_engine.h"
+#include "intel_engine_pm.h"
 
 static struct i915_global_context {
 	struct i915_global base;
@@ -162,7 +163,11 @@ intel_context_pin(struct i915_gem_context *ctx,
 		return ERR_PTR(-EINTR);
 
 	if (likely(!atomic_read(&ce->pin_count))) {
-		err = ce->ops->pin(ce);
+		intel_wakeref_t wakeref;
+
+		err = 0;
+		with_intel_runtime_pm(ce->engine->i915, wakeref)
+			err = ce->ops->pin(ce);
 		if (err)
 			goto err;
 
@@ -269,17 +274,10 @@ int __init i915_global_context_init(void)
 
 void intel_context_enter_engine(struct intel_context *ce)
 {
-	struct drm_i915_private *i915 = ce->gem_context->i915;
-
-	if (!i915->gt.active_requests++)
-		i915_gem_unpark(i915);
+	intel_engine_pm_get(ce->engine);
 }
 
 void intel_context_exit_engine(struct intel_context *ce)
 {
-	struct drm_i915_private *i915 = ce->gem_context->i915;
-
-	GEM_BUG_ON(!i915->gt.active_requests);
-	if (!--i915->gt.active_requests)
-		i915_gem_park(i915);
+	intel_engine_pm_put(ce->engine);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 72c7c337ace9..a228dc1774d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -382,6 +382,8 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
+void intel_engine_init_execlists(struct intel_engine_cs *engine);
+
 void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
@@ -458,19 +460,14 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
 {
 	if (engine->reset.reset)
 		engine->reset.reset(engine, stalled);
+	engine->serial++; /* contexts lost */
 }
 
-void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
-void intel_gt_resume(struct drm_i915_private *i915);
-
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
 void intel_engine_lost_context(struct intel_engine_cs *engine);
 
-void intel_engines_park(struct drm_i915_private *i915);
-void intel_engines_unpark(struct drm_i915_private *i915);
-
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 21dd3f25e641..268dfb8e16ff 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -27,6 +27,7 @@
 #include "i915_drv.h"
 
 #include "intel_engine.h"
+#include "intel_engine_pm.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
 
@@ -451,7 +452,7 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
 	i915_gem_batch_pool_init(&engine->batch_pool, engine);
 }
 
-static void intel_engine_init_execlist(struct intel_engine_cs *engine)
+void intel_engine_init_execlists(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
@@ -584,10 +585,11 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
 	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
 
 	intel_engine_init_breadcrumbs(engine);
-	intel_engine_init_execlist(engine);
+	intel_engine_init_execlists(engine);
 	intel_engine_init_hangcheck(engine);
 	intel_engine_init_batch_pool(engine);
 	intel_engine_init_cmd_parser(engine);
+	intel_engine_init__pm(engine);
 
 	/* Use the whole device by default */
 	engine->sseu =
@@ -758,30 +760,6 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	return ret;
 }
 
-void intel_gt_resume(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	/*
-	 * After resume, we may need to poke into the pinned kernel
-	 * contexts to paper over any damage caused by the sudden suspend.
-	 * Only the kernel contexts should remain pinned over suspend,
-	 * allowing us to fixup the user contexts on their first pin.
-	 */
-	for_each_engine(engine, i915, id) {
-		struct intel_context *ce;
-
-		ce = engine->kernel_context;
-		if (ce)
-			ce->ops->reset(ce);
-
-		ce = engine->preempt_context;
-		if (ce)
-			ce->ops->reset(ce);
-	}
-}
-
 /**
  * intel_engines_cleanup_common - cleans up the engine state created by
  *                                the common initiailizers.
@@ -1128,117 +1106,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 		engine->set_default_submission(engine);
 }
 
-static bool reset_engines(struct drm_i915_private *i915)
-{
-	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
-		return false;
-
-	return intel_gpu_reset(i915, ALL_ENGINES) == 0;
-}
-
-/**
- * intel_engines_sanitize: called after the GPU has lost power
- * @i915: the i915 device
- * @force: ignore a failed reset and sanitize engine state anyway
- *
- * Anytime we reset the GPU, either with an explicit GPU reset or through a
- * PCI power cycle, the GPU loses state and we must reset our state tracking
- * to match. Note that calling intel_engines_sanitize() if the GPU has not
- * been reset results in much confusion!
- */
-void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	GEM_TRACE("\n");
-
-	if (!reset_engines(i915) && !force)
-		return;
-
-	for_each_engine(engine, i915, id)
-		intel_engine_reset(engine, false);
-}
-
-/**
- * intel_engines_park: called when the GT is transitioning from busy->idle
- * @i915: the i915 device
- *
- * The GT is now idle and about to go to sleep (maybe never to wake again?).
- * Time for us to tidy and put away our toys (release resources back to the
- * system).
- */
-void intel_engines_park(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, i915, id) {
-		/* Flush the residual irq tasklets first. */
-		intel_engine_disarm_breadcrumbs(engine);
-		tasklet_kill(&engine->execlists.tasklet);
-
-		/*
-		 * We are committed now to parking the engines, make sure there
-		 * will be no more interrupts arriving later and the engines
-		 * are truly idle.
-		 */
-		if (wait_for(intel_engine_is_idle(engine), 10)) {
-			struct drm_printer p = drm_debug_printer(__func__);
-
-			dev_err(i915->drm.dev,
-				"%s is not idle before parking\n",
-				engine->name);
-			intel_engine_dump(engine, &p, NULL);
-		}
-
-		/* Must be reset upon idling, or we may miss the busy wakeup. */
-		GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
-
-		if (engine->park)
-			engine->park(engine);
-
-		if (engine->pinned_default_state) {
-			i915_gem_object_unpin_map(engine->default_state);
-			engine->pinned_default_state = NULL;
-		}
-
-		i915_gem_batch_pool_fini(&engine->batch_pool);
-		engine->execlists.no_priolist = false;
-	}
-
-	i915->gt.active_engines = 0;
-}
-
-/**
- * intel_engines_unpark: called when the GT is transitioning from idle->busy
- * @i915: the i915 device
- *
- * The GT was idle and now about to fire up with some new user requests.
- */
-void intel_engines_unpark(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, i915, id) {
-		void *map;
-
-		/* Pin the default state for fast resets from atomic context. */
-		map = NULL;
-		if (engine->default_state)
-			map = i915_gem_object_pin_map(engine->default_state,
-						      I915_MAP_WB);
-		if (!IS_ERR_OR_NULL(map))
-			engine->pinned_default_state = map;
-
-		if (engine->unpark)
-			engine->unpark(engine);
-
-		intel_engine_init_hangcheck(engine);
-	}
-}
-
 /**
  * intel_engine_lost_context: called when the GPU is reset into unknown state
  * @engine: the engine
@@ -1523,6 +1390,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	if (i915_reset_failed(engine->i915))
 		drm_printf(m, "*** WEDGED ***\n");
 
+	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
 	drm_printf(m, "\tHangcheck %x:%x [%d ms]\n",
 		   engine->hangcheck.last_seqno,
 		   engine->hangcheck.next_seqno,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
new file mode 100644
index 000000000000..cc0adfa14947
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -0,0 +1,153 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+
+#include "intel_engine.h"
+#include "intel_engine_pm.h"
+#include "intel_gt_pm.h"
+
+static int intel_engine_unpark(struct intel_wakeref *wf)
+{
+	struct intel_engine_cs *engine =
+		container_of(wf, typeof(*engine), wakeref);
+	void *map;
+
+	GEM_TRACE("%s\n", engine->name);
+
+	intel_gt_pm_get(engine->i915);
+
+	/* Pin the default state for fast resets from atomic context. */
+	map = NULL;
+	if (engine->default_state)
+		map = i915_gem_object_pin_map(engine->default_state,
+					      I915_MAP_WB);
+	if (!IS_ERR_OR_NULL(map))
+		engine->pinned_default_state = map;
+
+	if (engine->unpark)
+		engine->unpark(engine);
+
+	intel_engine_init_hangcheck(engine);
+	return 0;
+}
+
+void intel_engine_pm_get(struct intel_engine_cs *engine)
+{
+	intel_wakeref_get(engine->i915, &engine->wakeref, intel_engine_unpark);
+}
+
+static bool switch_to_kernel_context(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	/* Already inside the kernel context, safe to power down. */
+	if (engine->wakeref_serial == engine->serial)
+		return true;
+
+	/* GPU is pointing to the void, as good as in the kernel context. */
+	if (i915_reset_failed(engine->i915))
+		return true;
+
+	/*
+	 * Note, we do this without taking the timeline->mutex. We cannot
+	 * as we may be called while retiring the kernel context and so
+	 * already underneath the timeline->mutex. Instead we rely on the
+	 * exclusive property of the intel_engine_park that prevents anyone
+	 * else from creating a request on this engine. This also requires
+	 * that the ring is empty and we avoid any waits while constructing
+	 * the context, as they assume protection by the timeline->mutex.
+	 * This should hold true as we can only park the engine after
+	 * retiring the last request, thus all rings should be empty and
+	 * all timelines idle.
+	 */
+	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
+	if (IS_ERR(rq))
+		/* Context switch failed, hope for the best! Maybe reset? */
+		return true;
+
+	/* Check again on the next retirement. */
+	engine->wakeref_serial = engine->serial + 1;
+	__i915_request_commit(rq);
+
+	return false;
+}
+
+static int intel_engine_park(struct intel_wakeref *wf)
+{
+	struct intel_engine_cs *engine =
+		container_of(wf, typeof(*engine), wakeref);
+
+	/*
+	 * If one and only one request is completed between pm events,
+	 * we know that we are inside the kernel context and it is
+	 * safe to power down. (We are paranoid in case that runtime
+	 * suspend causes corruption to the active context image, and
+	 * want to avoid that impacting userspace.)
+	 */
+	if (!switch_to_kernel_context(engine))
+		return -EBUSY;
+
+	GEM_TRACE("%s\n", engine->name);
+
+	intel_engine_disarm_breadcrumbs(engine);
+
+	/* Must be reset upon idling, or we may miss the busy wakeup. */
+	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
+
+	if (engine->park)
+		engine->park(engine);
+
+	if (engine->pinned_default_state) {
+		i915_gem_object_unpin_map(engine->default_state);
+		engine->pinned_default_state = NULL;
+	}
+
+	engine->execlists.no_priolist = false;
+
+	intel_gt_pm_put(engine->i915);
+	return 0;
+}
+
+void intel_engine_pm_put(struct intel_engine_cs *engine)
+{
+	intel_wakeref_put(engine->i915, &engine->wakeref, intel_engine_park);
+}
+
+void intel_engine_init__pm(struct intel_engine_cs *engine)
+{
+	intel_wakeref_init(&engine->wakeref);
+}
+
+int intel_engines_resume(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * After resume, we may need to poke into the pinned kernel
+	 * contexts to paper over any damage caused by the sudden suspend.
+	 * Only the kernel contexts should remain pinned over suspend,
+	 * allowing us to fixup the user contexts on their first pin.
+	 */
+	intel_gt_pm_get(i915);
+	for_each_engine(engine, i915, id) {
+		intel_engine_pm_get(engine);
+		engine->serial++; /* kernel context lost */
+		err = engine->resume(engine);
+		intel_engine_pm_put(engine);
+		if (err) {
+			dev_err(i915->drm.dev,
+				"Failed to restart %s (%d)\n",
+				engine->name, err);
+			break;
+		}
+	}
+	intel_gt_pm_put(i915);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
new file mode 100644
index 000000000000..143ac90ba117
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_PM_H
+#define INTEL_ENGINE_PM_H
+
+struct drm_i915_private;
+struct intel_engine_cs;
+
+void intel_engine_pm_get(struct intel_engine_cs *engine);
+void intel_engine_pm_put(struct intel_engine_cs *engine);
+
+void intel_engine_init__pm(struct intel_engine_cs *engine);
+
+int intel_engines_resume(struct drm_i915_private *i915);
+
+#endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 3adf58da6d2c..d972c339309c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -20,6 +20,7 @@
 #include "i915_selftest.h"
 #include "i915_timeline_types.h"
 #include "intel_sseu.h"
+#include "intel_wakeref.h"
 #include "intel_workarounds_types.h"
 
 #define I915_MAX_SLICES	3
@@ -287,6 +288,10 @@ struct intel_engine_cs {
 	struct intel_context *kernel_context; /* pinned */
 	struct intel_context *preempt_context; /* pinned; optional */
 
+	unsigned long serial;
+
+	unsigned long wakeref_serial;
+	struct intel_wakeref wakeref;
 	struct drm_i915_gem_object *default_state;
 	void *pinned_default_state;
 
@@ -359,7 +364,7 @@ struct intel_engine_cs {
 	void		(*irq_enable)(struct intel_engine_cs *engine);
 	void		(*irq_disable)(struct intel_engine_cs *engine);
 
-	int		(*init_hw)(struct intel_engine_cs *engine);
+	int		(*resume)(struct intel_engine_cs *engine);
 
 	struct {
 		void (*prepare)(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
new file mode 100644
index 000000000000..ae7155f0e063
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -0,0 +1,143 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt_pm.h"
+#include "intel_pm.h"
+#include "intel_wakeref.h"
+
+static void pm_notify(struct drm_i915_private *i915, int state)
+{
+	blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915);
+}
+
+static int intel_gt_unpark(struct intel_wakeref *wf)
+{
+	struct drm_i915_private *i915 =
+		container_of(wf, typeof(*i915), gt.wakeref);
+
+	GEM_TRACE("\n");
+
+	/*
+	 * It seems that the DMC likes to transition between the DC states a lot
+	 * when there are no connected displays (no active power domains) during
+	 * command submission.
+	 *
+	 * This activity has negative impact on the performance of the chip with
+	 * huge latencies observed in the interrupt handler and elsewhere.
+	 *
+	 * Work around it by grabbing a GT IRQ power domain whilst there is any
+	 * GT activity, preventing any DC state transitions.
+	 */
+	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+	GEM_BUG_ON(!i915->gt.awake);
+
+	intel_enable_gt_powersave(i915);
+
+	i915_update_gfx_val(i915);
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_busy(i915);
+
+	i915_pmu_gt_unparked(i915);
+
+	i915_queue_hangcheck(i915);
+
+	pm_notify(i915, INTEL_GT_UNPARK);
+
+	return 0;
+}
+
+void intel_gt_pm_get(struct drm_i915_private *i915)
+{
+	intel_wakeref_get(i915, &i915->gt.wakeref, intel_gt_unpark);
+}
+
+static int intel_gt_park(struct intel_wakeref *wf)
+{
+	struct drm_i915_private *i915 =
+		container_of(wf, typeof(*i915), gt.wakeref);
+	intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake);
+
+	GEM_TRACE("\n");
+
+	pm_notify(i915, INTEL_GT_PARK);
+
+	i915_pmu_gt_parked(i915);
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_idle(i915);
+
+	GEM_BUG_ON(!wakeref);
+	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+
+	return 0;
+}
+
+void intel_gt_pm_put(struct drm_i915_private *i915)
+{
+	intel_wakeref_put(i915, &i915->gt.wakeref, intel_gt_park);
+}
+
+void intel_gt_pm_init(struct drm_i915_private *i915)
+{
+	intel_wakeref_init(&i915->gt.wakeref);
+	BLOCKING_INIT_NOTIFIER_HEAD(&i915->gt.pm_notifications);
+}
+
+static bool reset_engines(struct drm_i915_private *i915)
+{
+	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
+		return false;
+
+	return intel_gpu_reset(i915, ALL_ENGINES) == 0;
+}
+
+/**
+ * intel_gt_sanitize: called after the GPU has lost power
+ * @i915: the i915 device
+ * @force: ignore a failed reset and sanitize engine state anyway
+ *
+ * Anytime we reset the GPU, either with an explicit GPU reset or through a
+ * PCI power cycle, the GPU loses state and we must reset our state tracking
+ * to match. Note that calling intel_gt_sanitize() if the GPU has not
+ * been reset results in much confusion!
+ */
+void intel_gt_sanitize(struct drm_i915_private *i915, bool force)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	GEM_TRACE("\n");
+
+	if (!reset_engines(i915) && !force)
+		return;
+
+	for_each_engine(engine, i915, id)
+		intel_engine_reset(engine, false);
+}
+
+void intel_gt_resume(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/*
+	 * After resume, we may need to poke into the pinned kernel
+	 * contexts to paper over any damage caused by the sudden suspend.
+	 * Only the kernel contexts should remain pinned over suspend,
+	 * allowing us to fixup the user contexts on their first pin.
+	 */
+	for_each_engine(engine, i915, id) {
+		struct intel_context *ce;
+
+		ce = engine->kernel_context;
+		if (ce)
+			ce->ops->reset(ce);
+
+		ce = engine->preempt_context;
+		if (ce)
+			ce->ops->reset(ce);
+	}
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
new file mode 100644
index 000000000000..7dd1130a19a4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_PM_H
+#define INTEL_GT_PM_H
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+
+enum {
+	INTEL_GT_UNPARK,
+	INTEL_GT_PARK,
+};
+
+void intel_gt_pm_get(struct drm_i915_private *i915);
+void intel_gt_pm_put(struct drm_i915_private *i915);
+
+void intel_gt_pm_init(struct drm_i915_private *i915);
+
+void intel_gt_sanitize(struct drm_i915_private *i915, bool force);
+void intel_gt_resume(struct drm_i915_private *i915);
+
+#endif /* INTEL_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
index 3053a706a561..e5eaa06fe74d 100644
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
@@ -256,6 +256,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	unsigned int hung = 0, stuck = 0, wedged = 0;
+	intel_wakeref_t wakeref;
 
 	if (!i915_modparams.enable_hangcheck)
 		return;
@@ -266,6 +267,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	if (i915_terminally_wedged(dev_priv))
 		return;
 
+	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	if (!wakeref)
+		return;
+
 	/* As enabling the GPU requires fairly extensive mmio access,
 	 * periodically arm the mmio checker to see if we are triggering
 	 * any invalid access.
@@ -313,6 +318,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	if (hung)
 		hangcheck_declare_hang(dev_priv, hung, stuck);
 
+	intel_runtime_pm_put(dev_priv, wakeref);
+
 	/* Reset timer in case GPU hangs without another request being added */
 	i915_queue_hangcheck(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index edec7f183688..d17c08e26935 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1789,7 +1789,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
 	return unexpected;
 }
 
-static int gen8_init_common_ring(struct intel_engine_cs *engine)
+static int execlists_resume(struct intel_engine_cs *engine)
 {
 	intel_engine_apply_workarounds(engine);
 	intel_engine_apply_whitelist(engine);
@@ -1822,7 +1822,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * completed the reset in i915_gem_reset_finish(). If a request
 	 * is completed by one engine, it may then queue a request
 	 * to a second via its execlists->tasklet *just* as we are
-	 * calling engine->init_hw() and also writing the ELSP.
+	 * calling engine->resume() and also writing the ELSP.
 	 * Turning off the execlists->tasklet until the reset is over
 	 * prevents the race.
 	 */
@@ -2391,7 +2391,7 @@ static void
 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
-	engine->init_hw = gen8_init_common_ring;
+	engine->resume = execlists_resume;
 
 	engine->reset.prepare = execlists_reset_prepare;
 	engine->reset.reset = execlists_reset;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 996164d07397..af85723c7e2f 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -9,6 +9,8 @@
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
+#include "intel_engine_pm.h"
+#include "intel_gt_pm.h"
 #include "intel_reset.h"
 
 #include "intel_guc.h"
@@ -683,6 +685,7 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
 	 * written to the powercontext is undefined and so we may lose
 	 * GPU state upon resume, i.e. fail to restart after a reset.
 	 */
+	intel_engine_pm_get(engine);
 	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
 	engine->reset.prepare(engine);
 }
@@ -718,6 +721,7 @@ static void reset_prepare(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	intel_gt_pm_get(i915);
 	for_each_engine(engine, i915, id)
 		reset_prepare_engine(engine);
 
@@ -755,48 +759,10 @@ static int gt_reset(struct drm_i915_private *i915,
 static void reset_finish_engine(struct intel_engine_cs *engine)
 {
 	engine->reset.finish(engine);
+	intel_engine_pm_put(engine);
 	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
 }
 
-struct i915_gpu_restart {
-	struct work_struct work;
-	struct drm_i915_private *i915;
-};
-
-static void restart_work(struct work_struct *work)
-{
-	struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
-	struct drm_i915_private *i915 = arg->i915;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
-
-	wakeref = intel_runtime_pm_get(i915);
-	mutex_lock(&i915->drm.struct_mutex);
-	WRITE_ONCE(i915->gpu_error.restart, NULL);
-
-	for_each_engine(engine, i915, id) {
-		struct i915_request *rq;
-
-		/*
-		 * Ostensibily, we always want a context loaded for powersaving,
-		 * so if the engine is idle after the reset, send a request
-		 * to load our scratch kernel_context.
-		 */
-		if (!intel_engine_is_idle(engine))
-			continue;
-
-		rq = i915_request_create(engine->kernel_context);
-		if (!IS_ERR(rq))
-			i915_request_add(rq);
-	}
-
-	mutex_unlock(&i915->drm.struct_mutex);
-	intel_runtime_pm_put(i915, wakeref);
-
-	kfree(arg);
-}
-
 static void reset_finish(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -806,29 +772,7 @@ static void reset_finish(struct drm_i915_private *i915)
 		reset_finish_engine(engine);
 		intel_engine_signal_breadcrumbs(engine);
 	}
-}
-
-static void reset_restart(struct drm_i915_private *i915)
-{
-	struct i915_gpu_restart *arg;
-
-	/*
-	 * Following the reset, ensure that we always reload context for
-	 * powersaving, and to correct engine->last_retired_context. Since
-	 * this requires us to submit a request, queue a worker to do that
-	 * task for us to evade any locking here.
-	 */
-	if (READ_ONCE(i915->gpu_error.restart))
-		return;
-
-	arg = kmalloc(sizeof(*arg), GFP_KERNEL);
-	if (arg) {
-		arg->i915 = i915;
-		INIT_WORK(&arg->work, restart_work);
-
-		WRITE_ONCE(i915->gpu_error.restart, arg);
-		queue_work(i915->wq, &arg->work);
-	}
+	intel_gt_pm_put(i915);
 }
 
 static void nop_submit_request(struct i915_request *request)
@@ -889,6 +833,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
 	 * in nop_submit_request.
 	 */
 	synchronize_rcu_expedited();
+	set_bit(I915_WEDGED, &error->flags);
 
 	/* Mark all executing requests as skipped */
 	for_each_engine(engine, i915, id)
@@ -896,9 +841,6 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
 
 	reset_finish(i915);
 
-	smp_mb__before_atomic();
-	set_bit(I915_WEDGED, &error->flags);
-
 	GEM_TRACE("end\n");
 }
 
@@ -956,7 +898,7 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
 	}
 	mutex_unlock(&i915->gt.timelines.mutex);
 
-	intel_engines_sanitize(i915, false);
+	intel_gt_sanitize(i915, false);
 
 	/*
 	 * Undo nop_submit_request. We prevent all new i915 requests from
@@ -1034,7 +976,6 @@ void i915_reset(struct drm_i915_private *i915,
 	GEM_TRACE("flags=%lx\n", error->flags);
 
 	might_sleep();
-	assert_rpm_wakelock_held(i915);
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
@@ -1087,8 +1028,6 @@ void i915_reset(struct drm_i915_private *i915,
 
 finish:
 	reset_finish(i915);
-	if (!__i915_wedged(error))
-		reset_restart(i915);
 	return;
 
 taint:
@@ -1137,6 +1076,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
+	if (!intel_wakeref_active(&engine->wakeref))
+		return 0;
+
 	reset_prepare_engine(engine);
 
 	if (msg)
@@ -1168,7 +1110,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	 * have been reset to their default values. Follow the init_ring
 	 * process to program RING_MODE, HWSP and re-enable submission.
 	 */
-	ret = engine->init_hw(engine);
+	ret = engine->resume(engine);
 	if (ret)
 		goto out;
 
@@ -1425,25 +1367,6 @@ int i915_terminally_wedged(struct drm_i915_private *i915)
 	return __i915_wedged(error) ? -EIO : 0;
 }
 
-bool i915_reset_flush(struct drm_i915_private *i915)
-{
-	int err;
-
-	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-
-	flush_workqueue(i915->wq);
-	GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
-
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_LOCKED |
-				     I915_WAIT_FOR_IDLE_BOOST,
-				     MAX_SCHEDULE_TIMEOUT);
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return !err;
-}
-
 static void i915_wedge_me(struct work_struct *work)
 {
 	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 8e662bb43a9b..b52efaab4941 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -34,7 +34,6 @@ int i915_reset_engine(struct intel_engine_cs *engine,
 		      const char *reason);
 
 void i915_reset_request(struct i915_request *rq, bool guilty);
-bool i915_reset_flush(struct drm_i915_private *i915);
 
 int __must_check i915_reset_trylock(struct drm_i915_private *i915);
 void i915_reset_unlock(struct drm_i915_private *i915, int tag);
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index b2bb7d4bfbe3..f164dbe90050 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -637,12 +637,15 @@ static bool stop_ring(struct intel_engine_cs *engine)
 	return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
 }
 
-static int init_ring_common(struct intel_engine_cs *engine)
+static int xcs_resume(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct intel_ring *ring = engine->buffer;
 	int ret = 0;
 
+	GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n",
+		  engine->name, ring->head, ring->tail);
+
 	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
 
 	if (!stop_ring(engine)) {
@@ -827,12 +830,9 @@ static int intel_rcs_ctx_init(struct i915_request *rq)
 	return 0;
 }
 
-static int init_render_ring(struct intel_engine_cs *engine)
+static int rcs_resume(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	int ret = init_ring_common(engine);
-	if (ret)
-		return ret;
 
 	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
 	if (IS_GEN_RANGE(dev_priv, 4, 6))
@@ -875,7 +875,7 @@ static int init_render_ring(struct intel_engine_cs *engine)
 	if (INTEL_GEN(dev_priv) >= 6)
 		ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
 
-	return 0;
+	return xcs_resume(engine);
 }
 
 static void cancel_requests(struct intel_engine_cs *engine)
@@ -2207,7 +2207,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 
 	intel_ring_init_irq(dev_priv, engine);
 
-	engine->init_hw = init_ring_common;
+	engine->resume = xcs_resume;
 	engine->reset.prepare = reset_prepare;
 	engine->reset.reset = reset_ring;
 	engine->reset.finish = reset_finish;
@@ -2269,7 +2269,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 	if (IS_HASWELL(dev_priv))
 		engine->emit_bb_start = hsw_emit_bb_start;
 
-	engine->init_hw = init_render_ring;
+	engine->resume = rcs_resume;
 
 	ret = intel_init_ring_buffer(engine);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index bcfeb0c67997..a97a0ab35703 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -24,6 +24,7 @@
 
 #include "i915_drv.h"
 #include "intel_context.h"
+#include "intel_engine_pm.h"
 
 #include "mock_engine.h"
 #include "selftests/mock_request.h"
@@ -268,6 +269,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
 
 	intel_engine_init_breadcrumbs(&engine->base);
+	intel_engine_init_execlists(&engine->base);
+	intel_engine_init__pm(&engine->base);
 
 	/* fake hw queue */
 	spin_lock_init(&engine->hw_lock);
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 87c26920212f..6004d6907e9c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -24,6 +24,8 @@
 
 #include <linux/kthread.h>
 
+#include "intel_engine_pm.h"
+
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
 #include "selftests/igt_flush_test.h"
@@ -479,19 +481,6 @@ static int igt_reset_nop(void *arg)
 			break;
 		}
 
-		if (!i915_reset_flush(i915)) {
-			struct drm_printer p =
-				drm_info_printer(i915->drm.dev);
-
-			pr_err("%s failed to idle after reset\n",
-			       engine->name);
-			intel_engine_dump(engine, &p,
-					  "%s\n", engine->name);
-
-			err = -EIO;
-			break;
-		}
-
 		err = igt_flush_test(i915, 0);
 		if (err)
 			break;
@@ -594,19 +583,6 @@ static int igt_reset_nop_engine(void *arg)
 				err = -EINVAL;
 				break;
 			}
-
-			if (!i915_reset_flush(i915)) {
-				struct drm_printer p =
-					drm_info_printer(i915->drm.dev);
-
-				pr_err("%s failed to idle after reset\n",
-				       engine->name);
-				intel_engine_dump(engine, &p,
-						  "%s\n", engine->name);
-
-				err = -EIO;
-				break;
-			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
@@ -669,6 +645,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 		reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
 							     engine);
 
+		intel_engine_pm_get(engine);
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
 			if (active) {
@@ -721,21 +698,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 				err = -EINVAL;
 				break;
 			}
-
-			if (!i915_reset_flush(i915)) {
-				struct drm_printer p =
-					drm_info_printer(i915->drm.dev);
-
-				pr_err("%s failed to idle after reset\n",
-				       engine->name);
-				intel_engine_dump(engine, &p,
-						  "%s\n", engine->name);
-
-				err = -EIO;
-				break;
-			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+		intel_engine_pm_put(engine);
 
 		if (err)
 			break;
@@ -942,6 +907,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 			get_task_struct(tsk);
 		}
 
+		intel_engine_pm_get(engine);
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
 			struct i915_request *rq = NULL;
@@ -1018,6 +984,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+		intel_engine_pm_put(engine);
 		pr_info("i915_reset_engine(%s:%s): %lu resets\n",
 			engine->name, test_name, count);
 
@@ -1069,7 +1036,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 		if (err)
 			break;
 
-		err = igt_flush_test(i915, 0);
+		mutex_lock(&i915->drm.struct_mutex);
+		err = igt_flush_test(i915, I915_WAIT_LOCKED);
+		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			break;
 	}
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 96c6282f3a10..461d91737077 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -71,7 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 {
 	const u32 base = engine->mmio_base;
 	struct drm_i915_gem_object *result;
-	intel_wakeref_t wakeref;
 	struct i915_request *rq;
 	struct i915_vma *vma;
 	u32 srm, *cs;
@@ -103,9 +102,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (err)
 		goto err_obj;
 
-	rq = ERR_PTR(-ENODEV);
-	with_intel_runtime_pm(engine->i915, wakeref)
-		rq = i915_request_alloc(engine, ctx);
+	rq = i915_request_alloc(engine, ctx);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_pin;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8dcba78fb43b..00d3ff746eb1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2041,8 +2041,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	}
 
 	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
-	seq_printf(m, "GPU busy? %s [%d requests]\n",
-		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
+	seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
 	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
@@ -2061,9 +2060,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 
 	seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
 
-	if (INTEL_GEN(dev_priv) >= 6 &&
-	    rps->enabled &&
-	    dev_priv->gt.active_requests) {
+	if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && dev_priv->gt.awake) {
 		u32 rpup, rpupei;
 		u32 rpdown, rpdownei;
 
@@ -3092,9 +3089,9 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
 	wakeref = intel_runtime_pm_get(dev_priv);
 
-	seq_printf(m, "GT awake? %s\n", yesno(dev_priv->gt.awake));
-	seq_printf(m, "Global active requests: %d\n",
-		   dev_priv->gt.active_requests);
+	seq_printf(m, "GT awake? %s [%d]\n",
+		   yesno(dev_priv->gt.awake),
+		   atomic_read(&dev_priv->gt.wakeref.count));
 	seq_printf(m, "CS timestamp frequency: %u kHz\n",
 		   RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
 
@@ -3940,8 +3937,7 @@ i915_drop_caches_set(void *data, u64 val)
 
 	if (val & DROP_IDLE) {
 		do {
-			if (READ_ONCE(i915->gt.active_requests))
-				flush_delayed_work(&i915->gem.retire_work);
+			flush_delayed_work(&i915->gem.retire_work);
 			drain_delayed_work(&i915->gem.idle_work);
 		} while (READ_ONCE(i915->gt.awake));
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 98b997526daa..c8cb70d4fe91 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -47,8 +47,9 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
-#include "gt/intel_workarounds.h"
+#include "gt/intel_gt_pm.h"
 #include "gt/intel_reset.h"
+#include "gt/intel_workarounds.h"
 
 #include "i915_drv.h"
 #include "i915_pmu.h"
@@ -2323,7 +2324,7 @@ static int i915_drm_resume_early(struct drm_device *dev)
 
 	intel_power_domains_resume(dev_priv);
 
-	intel_engines_sanitize(dev_priv, true);
+	intel_gt_sanitize(dev_priv, true);
 
 	enable_rpm_wakeref_asserts(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cbae9be052e0..e5ae6c36e959 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2006,10 +2006,10 @@ struct drm_i915_private {
 			struct list_head hwsp_free_list;
 		} timelines;
 
-		intel_engine_mask_t active_engines;
 		struct list_head active_rings;
 		struct list_head closed_vma;
-		u32 active_requests;
+
+		struct intel_wakeref wakeref;
 
 		/**
 		 * Is the GPU currently considered idle, or busy executing
@@ -2020,12 +2020,16 @@ struct drm_i915_private {
 		 */
 		intel_wakeref_t awake;
 
+		struct blocking_notifier_head pm_notifications;
+
 		ktime_t last_init_time;
 
 		struct i915_vma *scratch;
 	} gt;
 
 	struct {
+		struct notifier_block pm_notifier;
+
 		/**
 		 * We leave the user IRQ off as much as possible,
 		 * but this means that requests will finish and never
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 74b99126830b..d0211271f103 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -39,6 +39,8 @@
 #include <linux/dma-buf.h>
 #include <linux/mman.h>
 
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt_pm.h"
 #include "gt/intel_mocs.h"
 #include "gt/intel_reset.h"
 #include "gt/intel_workarounds.h"
@@ -2911,9 +2913,6 @@ wait_for_timelines(struct drm_i915_private *i915,
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
 	struct i915_timeline *tl;
 
-	if (!READ_ONCE(i915->gt.active_requests))
-		return timeout;
-
 	mutex_lock(&gt->mutex);
 	list_for_each_entry(tl, &gt->active_list, link) {
 		struct i915_request *rq;
@@ -2953,9 +2952,10 @@ wait_for_timelines(struct drm_i915_private *i915,
 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 			   unsigned int flags, long timeout)
 {
-	GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
+	GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
 		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
-		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
+		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
+		  yesno(i915->gt.awake));
 
 	/* If the device is asleep, we have no requests outstanding */
 	if (!READ_ONCE(i915->gt.awake))
@@ -4177,7 +4177,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	 * it may impact the display and we are uncertain about the stability
 	 * of the reset, so this could be applied to even earlier gen.
 	 */
-	intel_engines_sanitize(i915, false);
+	intel_gt_sanitize(i915, false);
 
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(i915, wakeref);
@@ -4235,27 +4235,6 @@ static void init_unused_rings(struct drm_i915_private *dev_priv)
 	}
 }
 
-static int __i915_gem_restart_engines(void *data)
-{
-	struct drm_i915_private *i915 = data;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int err;
-
-	for_each_engine(engine, i915, id) {
-		err = engine->init_hw(engine);
-		if (err) {
-			DRM_ERROR("Failed to restart %s (%d)\n",
-				  engine->name, err);
-			return err;
-		}
-	}
-
-	intel_engines_set_scheduler_caps(i915);
-
-	return 0;
-}
-
 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
 {
 	int ret;
@@ -4314,12 +4293,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
 	intel_mocs_init_l3cc_table(dev_priv);
 
 	/* Only when the HW is re-initialised, can we replay the requests */
-	ret = __i915_gem_restart_engines(dev_priv);
+	ret = intel_engines_resume(dev_priv);
 	if (ret)
 		goto cleanup_uc;
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 
+	intel_engines_set_scheduler_caps(dev_priv);
 	return 0;
 
 cleanup_uc:
@@ -4625,6 +4605,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_init_hw:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	i915_gem_set_wedged(dev_priv);
 	i915_gem_suspend(dev_priv);
 	i915_gem_suspend_late(dev_priv);
 
@@ -4686,6 +4667,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 void i915_gem_fini(struct drm_i915_private *dev_priv)
 {
+	GEM_BUG_ON(dev_priv->gt.awake);
+
 	i915_gem_suspend_late(dev_priv);
 	intel_disable_gt_powersave(dev_priv);
 
@@ -4780,6 +4763,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 {
 	int err;
 
+	intel_gt_pm_init(dev_priv);
+
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 9074eb1e843f..67f8a4a807a0 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -75,9 +75,6 @@ struct drm_i915_private;
 
 #define I915_GEM_IDLE_TIMEOUT (HZ / 5)
 
-void i915_gem_park(struct drm_i915_private *i915);
-void i915_gem_unpark(struct drm_i915_private *i915);
-
 static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
 {
 	if (!atomic_fetch_inc(&t->count))
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3eb1a664b5fa..76ed74e75d82 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -824,26 +824,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static struct i915_request *
-last_request_on_engine(struct i915_timeline *timeline,
-		       struct intel_engine_cs *engine)
-{
-	struct i915_request *rq;
-
-	GEM_BUG_ON(timeline == &engine->timeline);
-
-	rq = i915_active_request_raw(&timeline->last_request,
-				     &engine->i915->drm.struct_mutex);
-	if (rq && rq->engine->mask & engine->mask) {
-		GEM_TRACE("last request on engine %s: %llx:%llu\n",
-			  engine->name, rq->fence.context, rq->fence.seqno);
-		GEM_BUG_ON(rq->timeline != timeline);
-		return rq;
-	}
-
-	return NULL;
-}
-
 struct context_barrier_task {
 	struct i915_active base;
 	void (*task)(void *data);
@@ -871,7 +851,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	struct drm_i915_private *i915 = ctx->i915;
 	struct context_barrier_task *cb;
 	struct intel_context *ce, *next;
-	intel_wakeref_t wakeref;
 	int err = 0;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -884,7 +863,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	i915_active_init(i915, &cb->base, cb_retire);
 	i915_active_acquire(&cb->base);
 
-	wakeref = intel_runtime_pm_get(i915);
 	rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
 		struct intel_engine_cs *engine = ce->engine;
 		struct i915_request *rq;
@@ -914,7 +892,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (err)
 			break;
 	}
-	intel_runtime_pm_put(i915, wakeref);
 
 	cb->task = err ? NULL : task; /* caller needs to unwind instead */
 	cb->data = data;
@@ -924,54 +901,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	return err;
 }
 
-int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
-				      intel_engine_mask_t mask)
-{
-	struct intel_engine_cs *engine;
-
-	GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake));
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(!i915->kernel_context);
-
-	/* Inoperable, so presume the GPU is safely pointing into the void! */
-	if (i915_terminally_wedged(i915))
-		return 0;
-
-	for_each_engine_masked(engine, i915, mask, mask) {
-		struct intel_ring *ring;
-		struct i915_request *rq;
-
-		rq = i915_request_create(engine->kernel_context);
-		if (IS_ERR(rq))
-			return PTR_ERR(rq);
-
-		/* Queue this switch after all other activity */
-		list_for_each_entry(ring, &i915->gt.active_rings, active_link) {
-			struct i915_request *prev;
-
-			prev = last_request_on_engine(ring->timeline, engine);
-			if (!prev)
-				continue;
-
-			if (prev->gem_context == i915->kernel_context)
-				continue;
-
-			GEM_TRACE("add barrier on %s for %llx:%lld\n",
-				  engine->name,
-				  prev->fence.context,
-				  prev->fence.seqno);
-			i915_sw_fence_await_sw_fence_gfp(&rq->submit,
-							 &prev->submit,
-							 I915_FENCE_GFP);
-		}
-
-		i915_request_add(rq);
-	}
-
-	return 0;
-}
-
 static int get_ppgtt(struct drm_i915_file_private *file_priv,
 		     struct i915_gem_context *ctx,
 		     struct drm_i915_gem_context_param *args)
@@ -1169,9 +1098,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
 static int
 gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 {
-	struct drm_i915_private *i915 = ce->engine->i915;
 	struct i915_request *rq;
-	intel_wakeref_t wakeref;
 	int ret;
 
 	lockdep_assert_held(&ce->pin_mutex);
@@ -1185,14 +1112,9 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	if (!intel_context_is_pinned(ce))
 		return 0;
 
-	/* Submitting requests etc needs the hw awake. */
-	wakeref = intel_runtime_pm_get(i915);
-
 	rq = i915_request_create(ce->engine->kernel_context);
-	if (IS_ERR(rq)) {
-		ret = PTR_ERR(rq);
-		goto out_put;
-	}
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
 
 	/* Queue this switch after all other activity by this context. */
 	ret = i915_active_request_set(&ce->ring->timeline->last_request, rq);
@@ -1216,9 +1138,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 
 out_add:
 	i915_request_add(rq);
-out_put:
-	intel_runtime_pm_put(i915, wakeref);
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index cec278ab04e2..5a8e080499fb 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -141,10 +141,6 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 			  struct drm_file *file);
 void i915_gem_context_close(struct drm_file *file);
 
-int i915_switch_context(struct i915_request *rq);
-int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
-				      intel_engine_mask_t engine_mask);
-
 void i915_gem_context_release(struct kref *ctx_ref);
 struct i915_gem_context *
 i915_gem_context_create_gvt(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 060f5903544a..0bdb3e072ba5 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -36,15 +36,8 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
 	bool fail_if_busy:1;
 } igt_evict_ctl;)
 
-static bool ggtt_is_idle(struct drm_i915_private *i915)
-{
-	return !i915->gt.active_requests;
-}
-
 static int ggtt_flush(struct drm_i915_private *i915)
 {
-	int err;
-
 	/*
 	 * Not everything in the GGTT is tracked via vma (otherwise we
 	 * could evict as required with minimal stalling) so we are forced
@@ -52,19 +45,10 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines);
-	if (err)
-		return err;
-
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE |
-				     I915_WAIT_LOCKED,
-				     MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		return err;
-
-	GEM_BUG_ON(!ggtt_is_idle(i915));
-	return 0;
+	return i915_gem_wait_for_idle(i915,
+				      I915_WAIT_INTERRUPTIBLE |
+				      I915_WAIT_LOCKED,
+				      MAX_SCHEDULE_TIMEOUT);
 }
 
 static bool
@@ -222,24 +206,17 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	 * us a termination condition, when the last retired context is
 	 * the kernel's there is no more we can evict.
 	 */
-	if (!ggtt_is_idle(dev_priv)) {
-		if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
-			return -EBUSY;
+	if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
+		return -EBUSY;
 
-		ret = ggtt_flush(dev_priv);
-		if (ret)
-			return ret;
+	ret = ggtt_flush(dev_priv);
+	if (ret)
+		return ret;
 
-		cond_resched();
-		goto search_again;
-	}
+	cond_resched();
 
-	/*
-	 * If we still have pending pageflip completions, drop
-	 * back to userspace to give our workqueues time to
-	 * acquire our locks and unpin the old scanouts.
-	 */
-	return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC;
+	flags |= PIN_NONBLOCK;
+	goto search_again;
 
 found:
 	/* drm_mm doesn't allow any other other operations while
diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c
index 9fb0e8d567a2..3554d55dae35 100644
--- a/drivers/gpu/drm/i915/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/i915_gem_pm.c
@@ -4,136 +4,63 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include "gt/intel_gt_pm.h"
+
 #include "i915_drv.h"
 #include "i915_gem_pm.h"
 #include "i915_globals.h"
-#include "intel_pm.h"
 
-static void __i915_gem_park(struct drm_i915_private *i915)
+static void i915_gem_park(struct drm_i915_private *i915)
 {
-	intel_wakeref_t wakeref;
-
-	GEM_TRACE("\n");
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(i915->gt.active_requests);
-	GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
-
-	if (!i915->gt.awake)
-		return;
-
-	/*
-	 * Be paranoid and flush a concurrent interrupt to make sure
-	 * we don't reactivate any irq tasklets after parking.
-	 *
-	 * FIXME: Note that even though we have waited for execlists to be idle,
-	 * there may still be an in-flight interrupt even though the CSB
-	 * is now empty. synchronize_irq() makes sure that a residual interrupt
-	 * is completed before we continue, but it doesn't prevent the HW from
-	 * raising a spurious interrupt later. To complete the shield we should
-	 * coordinate disabling the CS irq with flushing the interrupts.
-	 */
-	synchronize_irq(i915->drm.irq);
-
-	intel_engines_park(i915);
-	i915_timelines_park(i915);
-
-	i915_pmu_gt_parked(i915);
-	i915_vma_parked(i915);
-
-	wakeref = fetch_and_zero(&i915->gt.awake);
-	GEM_BUG_ON(!wakeref);
-
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_idle(i915);
-
-	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
-
-	i915_globals_park();
-}
-
-static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
-					  unsigned long mask)
-{
-	bool result = true;
-
-	/*
-	 * Even if we fail to switch, give whatever is running a small chance
-	 * to save itself before we report the failure. Yes, this may be a
-	 * false positive due to e.g. ENOMEM, caveat emptor!
-	 */
-	if (i915_gem_switch_to_kernel_context(i915, mask))
-		result = false;
 
-	if (i915_gem_wait_for_idle(i915,
-				   I915_WAIT_LOCKED |
-				   I915_WAIT_FOR_IDLE_BOOST,
-				   I915_GEM_IDLE_TIMEOUT))
-		result = false;
+	for_each_engine(engine, i915, id) {
+		/*
+		 * We are committed now to parking the engines, make sure there
+		 * will be no more interrupts arriving later and the engines
+		 * are truly idle.
+		 */
+		if (wait_for(intel_engine_is_idle(engine), 10)) {
+			struct drm_printer p = drm_debug_printer(__func__);
 
-	if (!result) {
-		if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
 			dev_err(i915->drm.dev,
-				"Failed to idle engines, declaring wedged!\n");
-			GEM_TRACE_DUMP();
+				"%s is not idle before parking\n",
+				engine->name);
+			intel_engine_dump(engine, &p, NULL);
 		}
+		tasklet_kill(&engine->execlists.tasklet);
 
-		/* Forcibly cancel outstanding work and leave the gpu quiet. */
-		i915_gem_set_wedged(i915);
+		i915_gem_batch_pool_fini(&engine->batch_pool);
 	}
 
-	i915_retire_requests(i915); /* ensure we flush after wedging */
-	return result;
+	i915_timelines_park(i915);
+	i915_vma_parked(i915);
+
+	i915_globals_park();
 }
 
 static void idle_work_handler(struct work_struct *work)
 {
 	struct drm_i915_private *i915 =
 		container_of(work, typeof(*i915), gem.idle_work.work);
-	bool rearm_hangcheck;
-
-	if (!READ_ONCE(i915->gt.awake))
-		return;
-
-	if (READ_ONCE(i915->gt.active_requests))
-		return;
-
-	rearm_hangcheck =
-		cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
 
 	if (!mutex_trylock(&i915->drm.struct_mutex)) {
 		/* Currently busy, come back later */
 		mod_delayed_work(i915->wq,
 				 &i915->gem.idle_work,
 				 msecs_to_jiffies(50));
-		goto out_rearm;
+		return;
 	}
 
-	/*
-	 * Flush out the last user context, leaving only the pinned
-	 * kernel context resident. Should anything unfortunate happen
-	 * while we are idle (such as the GPU being power cycled), no users
-	 * will be harmed.
-	 */
-	if (!work_pending(&i915->gem.idle_work.work) &&
-	    !i915->gt.active_requests) {
-		++i915->gt.active_requests; /* don't requeue idle */
-
-		switch_to_kernel_context_sync(i915, i915->gt.active_engines);
-
-		if (!--i915->gt.active_requests) {
-			__i915_gem_park(i915);
-			rearm_hangcheck = false;
-		}
-	}
+	intel_wakeref_lock(&i915->gt.wakeref);
+	if (!intel_wakeref_active(&i915->gt.wakeref))
+		i915_gem_park(i915);
+	intel_wakeref_unlock(&i915->gt.wakeref);
 
 	mutex_unlock(&i915->drm.struct_mutex);
-
-out_rearm:
-	if (rearm_hangcheck) {
-		GEM_BUG_ON(!i915->gt.awake);
-		i915_queue_hangcheck(i915);
-	}
 }
 
 static void retire_work_handler(struct work_struct *work)
@@ -147,97 +74,76 @@ static void retire_work_handler(struct work_struct *work)
 		mutex_unlock(&i915->drm.struct_mutex);
 	}
 
-	/*
-	 * Keep the retire handler running until we are finally idle.
-	 * We do not need to do this test under locking as in the worst-case
-	 * we queue the retire worker once too often.
-	 */
-	if (READ_ONCE(i915->gt.awake))
+	if (intel_wakeref_active(&i915->gt.wakeref))
 		queue_delayed_work(i915->wq,
 				   &i915->gem.retire_work,
 				   round_jiffies_up_relative(HZ));
 }
 
-void i915_gem_park(struct drm_i915_private *i915)
+static int pm_notifier(struct notifier_block *nb,
+		       unsigned long action,
+		       void *data)
 {
-	GEM_TRACE("\n");
+	struct drm_i915_private *i915 =
+		container_of(nb, typeof(*i915), gem.pm_notifier);
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(i915->gt.active_requests);
+	switch (action) {
+	case INTEL_GT_UNPARK:
+		i915_globals_unpark();
+		queue_delayed_work(i915->wq,
+				   &i915->gem.retire_work,
+				   round_jiffies_up_relative(HZ));
+		break;
 
-	if (!i915->gt.awake)
-		return;
+	case INTEL_GT_PARK:
+		mod_delayed_work(i915->wq,
+				 &i915->gem.idle_work,
+				 msecs_to_jiffies(100));
+		break;
+	}
 
-	/* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
-	mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(100));
+	return NOTIFY_OK;
 }
 
-void i915_gem_unpark(struct drm_i915_private *i915)
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
 {
-	GEM_TRACE("\n");
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(!i915->gt.active_requests);
-	assert_rpm_wakelock_held(i915);
-
-	if (i915->gt.awake)
-		return;
-
-	/*
-	 * It seems that the DMC likes to transition between the DC states a lot
-	 * when there are no connected displays (no active power domains) during
-	 * command submission.
-	 *
-	 * This activity has negative impact on the performance of the chip with
-	 * huge latencies observed in the interrupt handler and elsewhere.
-	 *
-	 * Work around it by grabbing a GT IRQ power domain whilst there is any
-	 * GT activity, preventing any DC state transitions.
-	 */
-	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
-	GEM_BUG_ON(!i915->gt.awake);
-
-	i915_globals_unpark();
-
-	intel_enable_gt_powersave(i915);
-	i915_update_gfx_val(i915);
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_busy(i915);
-	i915_pmu_gt_unparked(i915);
-
-	intel_engines_unpark(i915);
+	bool result = true;
 
-	i915_queue_hangcheck(i915);
+	do {
+		if (i915_gem_wait_for_idle(i915,
+					   I915_WAIT_LOCKED |
+					   I915_WAIT_FOR_IDLE_BOOST,
+					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+			/* XXX hide warning from gem_eio */
+			if (i915_modparams.reset) {
+				dev_err(i915->drm.dev,
+					"Failed to idle engines, declaring wedged!\n");
+				GEM_TRACE_DUMP();
+			}
+
+			/*
+			 * Forcibly cancel outstanding work and leave
+			 * the gpu quiet.
+			 */
+			i915_gem_set_wedged(i915);
+			result = false;
+		}
+	} while (i915_retire_requests(i915) && result);
 
-	queue_delayed_work(i915->wq,
-			   &i915->gem.retire_work,
-			   round_jiffies_up_relative(HZ));
+	GEM_BUG_ON(i915->gt.awake);
+	return result;
 }
 
 bool i915_gem_load_power_context(struct drm_i915_private *i915)
 {
-	/* Force loading the kernel context on all engines */
-	if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
-		return false;
-
-	/*
-	 * Immediately park the GPU so that we enable powersaving and
-	 * treat it as idle. The next time we issue a request, we will
-	 * unpark and start using the engine->pinned_default_state, otherwise
-	 * it is in limbo and an early reset may fail.
-	 */
-	__i915_gem_park(i915);
-
-	return true;
+	return switch_to_kernel_context_sync(i915);
 }
 
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
-	intel_wakeref_t wakeref;
-
 	GEM_TRACE("\n");
 
-	wakeref = intel_runtime_pm_get(i915);
+	flush_workqueue(i915->wq);
 
 	mutex_lock(&i915->drm.struct_mutex);
 
@@ -250,10 +156,16 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	switch_to_kernel_context_sync(i915, i915->gt.active_engines);
+	switch_to_kernel_context_sync(i915);
 
 	mutex_unlock(&i915->drm.struct_mutex);
-	i915_reset_flush(i915);
+
+	/*
+	 * Assert that we successfully flushed all the work and
+	 * reset the GPU back to its idle, low power state.
+	 */
+	GEM_BUG_ON(i915->gt.awake);
+	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
 
 	drain_delayed_work(&i915->gem.retire_work);
 
@@ -263,17 +175,9 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 */
 	drain_delayed_work(&i915->gem.idle_work);
 
-	flush_workqueue(i915->wq);
-
-	/*
-	 * Assert that we successfully flushed all the work and
-	 * reset the GPU back to its idle, low power state.
-	 */
-	GEM_BUG_ON(i915->gt.awake);
+	i915_gem_drain_freed_objects(i915);
 
 	intel_uc_suspend(i915);
-
-	intel_runtime_pm_put(i915, wakeref);
 }
 
 void i915_gem_suspend_late(struct drm_i915_private *i915)
@@ -362,4 +266,8 @@ void i915_gem_init__pm(struct drm_i915_private *i915)
 {
 	INIT_DELAYED_WORK(&i915->gem.idle_work, idle_work_handler);
 	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
+
+	i915->gem.pm_notifier.notifier_call = pm_notifier;
+	blocking_notifier_chain_register(&i915->gt.pm_notifications,
+					 &i915->gem.pm_notifier);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h
index 52f65e3f06b5..6f7d5d11ac3b 100644
--- a/drivers/gpu/drm/i915/i915_gem_pm.h
+++ b/drivers/gpu/drm/i915/i915_gem_pm.h
@@ -17,9 +17,6 @@ void i915_gem_init__pm(struct drm_i915_private *i915);
 bool i915_gem_load_power_context(struct drm_i915_private *i915);
 void i915_gem_resume(struct drm_i915_private *i915);
 
-void i915_gem_unpark(struct drm_i915_private *i915);
-void i915_gem_park(struct drm_i915_private *i915);
-
 void i915_gem_idle_work_handler(struct work_struct *work);
 
 void i915_gem_suspend(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index b419d0f59275..2ecd0c6a1c94 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -179,8 +179,6 @@ struct i915_gpu_state {
 	struct scatterlist *sgl, *fit;
 };
 
-struct i915_gpu_restart;
-
 struct i915_gpu_error {
 	/* For hangcheck timer */
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
@@ -241,8 +239,6 @@ struct i915_gpu_error {
 	wait_queue_head_t reset_queue;
 
 	struct srcu_struct reset_backoff_srcu;
-
-	struct i915_gpu_restart *restart;
 };
 
 struct drm_i915_error_state_buf {
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 672c9ea6c24f..d116b5e69826 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -431,6 +431,8 @@ void __i915_request_submit(struct i915_request *request)
 	/* Transfer from per-context onto the global per-engine timeline */
 	move_to_timeline(request, &engine->timeline);
 
+	engine->serial++;
+
 	trace_i915_request_execute(request);
 }
 
@@ -1146,7 +1148,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	list_add_tail(&rq->ring_link, &ring->request_list);
 	if (list_is_first(&rq->ring_link, &ring->request_list))
 		list_add(&ring->active_link, &rq->i915->gt.active_rings);
-	rq->i915->gt.active_engines |= rq->engine->mask;
 	rq->emitted_jiffies = jiffies;
 
 	/*
@@ -1418,21 +1419,20 @@ long i915_request_wait(struct i915_request *rq,
 	return timeout;
 }
 
-void i915_retire_requests(struct drm_i915_private *i915)
+bool i915_retire_requests(struct drm_i915_private *i915)
 {
 	struct intel_ring *ring, *tmp;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	if (!i915->gt.active_requests)
-		return;
-
 	list_for_each_entry_safe(ring, tmp,
 				 &i915->gt.active_rings, active_link) {
 		intel_ring_get(ring); /* last rq holds reference! */
 		ring_retire_requests(ring);
 		intel_ring_put(ring);
 	}
+
+	return !list_empty(&i915->gt.active_rings);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 36f13b74ec58..1eee7416af31 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -425,6 +425,6 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
 	rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
 }
 
-void i915_retire_requests(struct drm_i915_private *i915);
+bool i915_retire_requests(struct drm_i915_private *i915);
 
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 13f823ff8083..fd9d3b0d9f47 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -466,26 +466,22 @@ void intel_uc_reset_prepare(struct drm_i915_private *i915)
 	intel_uc_sanitize(i915);
 }
 
-int intel_uc_suspend(struct drm_i915_private *i915)
+void intel_uc_suspend(struct drm_i915_private *i915)
 {
 	struct intel_guc *guc = &i915->guc;
+	intel_wakeref_t wakeref;
 	int err;
 
-	if (!USES_GUC(i915))
-		return 0;
-
 	if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
-		return 0;
-
-	err = intel_guc_suspend(guc);
-	if (err) {
-		DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
-		return err;
-	}
+		return;
 
-	guc_disable_communication(guc);
+	with_intel_runtime_pm(i915, wakeref) {
+		err = intel_guc_suspend(guc);
+		if (err)
+			DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
 
-	return 0;
+		guc_disable_communication(guc);
+	}
 }
 
 int intel_uc_resume(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index c14729786652..c92436b1f1c5 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -39,7 +39,7 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
 int intel_uc_init(struct drm_i915_private *dev_priv);
 void intel_uc_fini(struct drm_i915_private *dev_priv);
 void intel_uc_reset_prepare(struct drm_i915_private *i915);
-int intel_uc_suspend(struct drm_i915_private *dev_priv);
+void intel_uc_suspend(struct drm_i915_private *i915);
 int intel_uc_resume(struct drm_i915_private *dev_priv);
 
 static inline bool intel_uc_is_using_guc(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 6fd70d326468..0342de369d3e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -16,26 +16,18 @@ static int switch_to_context(struct drm_i915_private *i915,
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
-	int err = 0;
-
-	wakeref = intel_runtime_pm_get(i915);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
 
 		rq = i915_request_alloc(engine, ctx);
-		if (IS_ERR(rq)) {
-			err = PTR_ERR(rq);
-			break;
-		}
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
 
 		i915_request_add(rq);
 	}
 
-	intel_runtime_pm_put(i915, wakeref);
-
-	return err;
+	return 0;
 }
 
 static void trash_stolen(struct drm_i915_private *i915)
@@ -120,7 +112,7 @@ static void pm_resume(struct drm_i915_private *i915)
 	 * that runtime-pm just works.
 	 */
 	with_intel_runtime_pm(i915, wakeref) {
-		intel_engines_sanitize(i915, false);
+		intel_gt_sanitize(i915, false);
 		i915_gem_sanitize(i915);
 		i915_gem_resume(i915);
 	}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 9d646fa1b74e..71d896bbade2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1608,113 +1608,6 @@ __engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines)
 	return "none";
 }
 
-static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
-					  struct i915_gem_context *ctx,
-					  intel_engine_mask_t engines)
-{
-	struct intel_engine_cs *engine;
-	intel_engine_mask_t tmp;
-	int pass;
-
-	GEM_TRACE("Testing %s\n", __engine_name(i915, engines));
-	for (pass = 0; pass < 4; pass++) { /* Once busy; once idle; repeat */
-		bool from_idle = pass & 1;
-		int err;
-
-		if (!from_idle) {
-			for_each_engine_masked(engine, i915, engines, tmp) {
-				struct i915_request *rq;
-
-				rq = i915_request_alloc(engine, ctx);
-				if (IS_ERR(rq))
-					return PTR_ERR(rq);
-
-				i915_request_add(rq);
-			}
-		}
-
-		err = i915_gem_switch_to_kernel_context(i915,
-							i915->gt.active_engines);
-		if (err)
-			return err;
-
-		if (!from_idle) {
-			err = i915_gem_wait_for_idle(i915,
-						     I915_WAIT_LOCKED,
-						     MAX_SCHEDULE_TIMEOUT);
-			if (err)
-				return err;
-		}
-
-		if (i915->gt.active_requests) {
-			pr_err("%d active requests remain after switching to kernel context, pass %d (%s) on %s engine%s\n",
-			       i915->gt.active_requests,
-			       pass, from_idle ? "idle" : "busy",
-			       __engine_name(i915, engines),
-			       is_power_of_2(engines) ? "" : "s");
-			return -EINVAL;
-		}
-
-		/* XXX Bonus points for proving we are the kernel context! */
-
-		mutex_unlock(&i915->drm.struct_mutex);
-		drain_delayed_work(&i915->gem.idle_work);
-		mutex_lock(&i915->drm.struct_mutex);
-	}
-
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		return -EIO;
-
-	return 0;
-}
-
-static int igt_switch_to_kernel_context(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx;
-	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
-	int err;
-
-	/*
-	 * A core premise of switching to the kernel context is that
-	 * if an engine is already idling in the kernel context, we
-	 * do not emit another request and wake it up. The other being
-	 * that we do indeed end up idling in the kernel context.
-	 */
-
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
-
-	ctx = kernel_context(i915);
-	if (IS_ERR(ctx)) {
-		mutex_unlock(&i915->drm.struct_mutex);
-		return PTR_ERR(ctx);
-	}
-
-	/* First check idling each individual engine */
-	for_each_engine(engine, i915, id) {
-		err = __igt_switch_to_kernel_context(i915, ctx, BIT(id));
-		if (err)
-			goto out_unlock;
-	}
-
-	/* Now en masse */
-	err = __igt_switch_to_kernel_context(i915, ctx, ALL_ENGINES);
-	if (err)
-		goto out_unlock;
-
-out_unlock:
-	GEM_TRACE_DUMP_ON(err);
-
-	intel_runtime_pm_put(i915, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	kernel_context_close(ctx);
-	return err;
-}
-
 static void mock_barrier_task(void *data)
 {
 	unsigned int *counter = data;
@@ -1729,7 +1622,6 @@ static int mock_context_barrier(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct i915_gem_context *ctx;
 	struct i915_request *rq;
-	intel_wakeref_t wakeref;
 	unsigned int counter;
 	int err;
 
@@ -1772,9 +1664,7 @@ static int mock_context_barrier(void *arg)
 		goto out;
 	}
 
-	rq = ERR_PTR(-ENODEV);
-	with_intel_runtime_pm(i915, wakeref)
-		rq = i915_request_alloc(i915->engine[RCS0], ctx);
+	rq = i915_request_alloc(i915->engine[RCS0], ctx);
 	if (IS_ERR(rq)) {
 		pr_err("Request allocation failed!\n");
 		goto out;
@@ -1824,7 +1714,6 @@ static int mock_context_barrier(void *arg)
 int i915_gem_context_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_switch_to_kernel_context),
 		SUBTEST(mock_context_barrier),
 	};
 	struct drm_i915_private *i915;
@@ -1843,7 +1732,6 @@ int i915_gem_context_mock_selftests(void)
 int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 {
 	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_switch_to_kernel_context),
 		SUBTEST(live_nop_switch),
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 12203d665a4e..088b2aa05dcd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -24,6 +24,7 @@
 
 #include "../i915_selftest.h"
 
+#include "igt_flush_test.h"
 #include "mock_gem_device.h"
 #include "huge_gem_object.h"
 
@@ -505,19 +506,23 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	i915_gem_shrinker_unregister(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	if (!i915->gt.active_requests++) {
-		intel_wakeref_t wakeref;
-
-		with_intel_runtime_pm(i915, wakeref)
-			i915_gem_unpark(i915);
-	}
-	mutex_unlock(&i915->drm.struct_mutex);
+	intel_gt_pm_get(i915);
 
 	cancel_delayed_work_sync(&i915->gem.retire_work);
 	cancel_delayed_work_sync(&i915->gem.idle_work);
 }
 
+static void restore_retire_worker(struct drm_i915_private *i915)
+{
+	intel_gt_pm_put(i915);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	i915_gem_shrinker_register(i915);
+}
+
 static int igt_mmap_offset_exhaustion(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -615,13 +620,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 out:
 	drm_mm_remove_node(&resv);
 out_park:
-	mutex_lock(&i915->drm.struct_mutex);
-	if (--i915->gt.active_requests)
-		queue_delayed_work(i915->wq, &i915->gem.retire_work, 0);
-	else
-		queue_delayed_work(i915->wq, &i915->gem.idle_work, 0);
-	mutex_unlock(&i915->drm.struct_mutex);
-	i915_gem_shrinker_register(i915);
+	restore_retire_worker(i915);
 	return err;
 err_obj:
 	i915_gem_object_put(obj);
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index 94aee4071a66..e42f3c58536a 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -11,23 +11,29 @@
 
 int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
 {
+	int ret = i915_terminally_wedged(i915) ? -EIO : 0;
+	int repeat = !!(flags & I915_WAIT_LOCKED);
+
 	cond_resched();
 
-	if (flags & I915_WAIT_LOCKED &&
-	    i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines)) {
-		pr_err("Failed to switch back to kernel context; declaring wedged\n");
-		i915_gem_set_wedged(i915);
-	}
+	do {
+		if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
+			pr_err("%pS timed out, cancelling all further testing.\n",
+			       __builtin_return_address(0));
 
-	if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
-		pr_err("%pS timed out, cancelling all further testing.\n",
-		       __builtin_return_address(0));
+			GEM_TRACE("%pS timed out.\n",
+				  __builtin_return_address(0));
+			GEM_TRACE_DUMP();
 
-		GEM_TRACE("%pS timed out.\n", __builtin_return_address(0));
-		GEM_TRACE_DUMP();
+			i915_gem_set_wedged(i915);
+			repeat = 0;
+			ret = -EIO;
+		}
 
-		i915_gem_set_wedged(i915);
-	}
+		/* Ensure we also flush after wedging. */
+		if (flags & I915_WAIT_LOCKED)
+			i915_retire_requests(i915);
+	} while (repeat--);
 
-	return i915_terminally_wedged(i915);
+	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index fb677b4019a0..c072424c6b7c 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -41,11 +41,10 @@ void mock_device_flush(struct drm_i915_private *i915)
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	for_each_engine(engine, i915, id)
-		mock_engine_flush(engine);
-
-	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
+	do {
+		for_each_engine(engine, i915, id)
+			mock_engine_flush(engine);
+	} while (i915_retire_requests(i915));
 }
 
 static void mock_device_release(struct drm_device *dev)
@@ -110,10 +109,6 @@ static void mock_retire_work_handler(struct work_struct *work)
 
 static void mock_idle_work_handler(struct work_struct *work)
 {
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), gem.idle_work.work);
-
-	i915->gt.active_engines = 0;
 }
 
 static int pm_domain_resume(struct device *dev)
@@ -185,6 +180,8 @@ struct drm_i915_private *mock_gem_device(void)
 
 	mock_uncore_init(&i915->uncore);
 	i915_gem_init__mm(i915);
+	intel_gt_pm_init(i915);
+	atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 13/32] drm/i915/gvt: Pin the per-engine GVT shadow contexts
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (10 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 12/32] drm/i915: Invert the GEM wakeref hierarchy Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 14/32] drm/i915: Explicitly pin the logical context for execbuf Chris Wilson
                   ` (22 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Our eventual goal is to rid request construction of struct_mutex, with
the short term step of lifting the struct_mutex requirements into the
higher levels (i.e. the caller must ensure that the context is already
pinned into the GTT). In this patch, we pin GVT's shadow context upon
allocation and so keep them pinned into the GGTT for as long as the
virtual machine is alive, and so we can use the simpler request
construction path safe in the knowledge that the hard work is already
done.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h          |   2 +-
 drivers/gpu/drm/i915/gvt/kvmgt.c        |   2 +-
 drivers/gpu/drm/i915/gvt/mmio_context.c |   3 +-
 drivers/gpu/drm/i915/gvt/scheduler.c    | 137 ++++++++++++------------
 4 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index f5a328b5290a..b54f2bdc13a4 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -149,9 +149,9 @@ struct intel_vgpu_submission_ops {
 struct intel_vgpu_submission {
 	struct intel_vgpu_execlist execlist[I915_NUM_ENGINES];
 	struct list_head workload_q_head[I915_NUM_ENGINES];
+	struct intel_context *shadow[I915_NUM_ENGINES];
 	struct kmem_cache *workloads;
 	atomic_t running_workload_num;
-	struct i915_gem_context *shadow_ctx;
 	union {
 		u64 i915_context_pml4;
 		u64 i915_context_pdps[GEN8_3LVL_PDPES];
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index a68addf95c23..144301b778df 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1576,7 +1576,7 @@ hw_id_show(struct device *dev, struct device_attribute *attr,
 		struct intel_vgpu *vgpu = (struct intel_vgpu *)
 			mdev_get_drvdata(mdev);
 		return sprintf(buf, "%u\n",
-			       vgpu->submission.shadow_ctx->hw_id);
+			       vgpu->submission.shadow[0]->gem_context->hw_id);
 	}
 	return sprintf(buf, "\n");
 }
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index e7e14c842be4..b8823495022b 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -495,8 +495,7 @@ static void switch_mmio(struct intel_vgpu *pre,
 			 * itself.
 			 */
 			if (mmio->in_context &&
-			    !is_inhibit_context(intel_context_lookup(s->shadow_ctx,
-								     dev_priv->engine[ring_id])))
+			    !is_inhibit_context(s->shadow[ring_id]))
 				continue;
 
 			if (mmio->mask)
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 8998fa5ab198..40d9f549a0cd 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -36,6 +36,7 @@
 #include <linux/kthread.h>
 
 #include "i915_drv.h"
+#include "i915_gem_pm.h"
 #include "gvt.h"
 
 #define RING_CTX_OFF(x) \
@@ -277,18 +278,23 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static void shadow_context_descriptor_update(struct intel_context *ce)
+static void
+shadow_context_descriptor_update(struct intel_context *ce,
+				 struct intel_vgpu_workload *workload)
 {
-	u64 desc = 0;
-
-	desc = ce->lrc_desc;
+	u64 desc = ce->lrc_desc;
 
-	/* Update bits 0-11 of the context descriptor which includes flags
+	/*
+	 * Update bits 0-11 of the context descriptor which includes flags
 	 * like GEN8_CTX_* cached in desc_template
 	 */
 	desc &= U64_MAX << 12;
 	desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1);
 
+	desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
+	desc |= workload->ctx_desc.addressing_mode <<
+		GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
 	ce->lrc_desc = desc;
 }
 
@@ -365,26 +371,22 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
 	struct i915_request *rq;
-	int ret = 0;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	if (workload->req)
-		goto out;
+		return 0;
 
-	rq = i915_request_alloc(engine, shadow_ctx);
+	rq = i915_request_create(s->shadow[workload->ring_id]);
 	if (IS_ERR(rq)) {
 		gvt_vgpu_err("fail to allocate gem request\n");
-		ret = PTR_ERR(rq);
-		goto out;
+		return PTR_ERR(rq);
 	}
+
 	workload->req = i915_request_get(rq);
-out:
-	return ret;
+	return 0;
 }
 
 /**
@@ -399,10 +401,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
-	struct intel_context *ce;
 	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -410,29 +409,13 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	if (workload->shadow)
 		return 0;
 
-	/* pin shadow context by gvt even the shadow context will be pinned
-	 * when i915 alloc request. That is because gvt will update the guest
-	 * context from shadow context when workload is completed, and at that
-	 * moment, i915 may already unpined the shadow context to make the
-	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
-	 * the guest context, gvt can unpin the shadow_ctx safely.
-	 */
-	ce = intel_context_pin(shadow_ctx, engine);
-	if (IS_ERR(ce)) {
-		gvt_vgpu_err("fail to pin shadow context\n");
-		return PTR_ERR(ce);
-	}
-
-	shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
-	shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
-				    GEN8_CTX_ADDRESSING_MODE_SHIFT;
-
 	if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated))
-		shadow_context_descriptor_update(ce);
+		shadow_context_descriptor_update(s->shadow[workload->ring_id],
+						 workload);
 
 	ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
 	if (ret)
-		goto err_unpin;
+		return ret;
 
 	if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) {
 		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
@@ -444,8 +427,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	return 0;
 err_shadow:
 	release_shadow_wa_ctx(&workload->wa_ctx);
-err_unpin:
-	intel_context_unpin(ce);
 	return ret;
 }
 
@@ -672,7 +653,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct i915_request *rq;
 	int ring_id = workload->ring_id;
 	int ret;
@@ -683,7 +663,8 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	mutex_lock(&vgpu->vgpu_lock);
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
-	ret = set_context_ppgtt_from_shadow(workload, shadow_ctx);
+	ret = set_context_ppgtt_from_shadow(workload,
+					    s->shadow[ring_id]->gem_context);
 	if (ret < 0) {
 		gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
 		goto err_req;
@@ -994,8 +975,6 @@ static int workload_thread(void *priv)
 				workload->ring_id, workload,
 				workload->vgpu->id);
 
-		intel_runtime_pm_get(gvt->dev_priv);
-
 		gvt_dbg_sched("ring id %d will dispatch workload %p\n",
 				workload->ring_id, workload);
 
@@ -1025,7 +1004,6 @@ static int workload_thread(void *priv)
 			intel_uncore_forcewake_put(&gvt->dev_priv->uncore,
 					FORCEWAKE_ALL);
 
-		intel_runtime_pm_put_unchecked(gvt->dev_priv);
 		if (ret && (vgpu_is_vm_unhealthy(ret)))
 			enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
 	}
@@ -1108,17 +1086,17 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
 }
 
 static void
-i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
+i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s,
+				struct i915_hw_ppgtt *ppgtt)
 {
-	struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
 	int i;
 
-	if (i915_vm_is_4lvl(&i915_ppgtt->vm)) {
-		px_dma(&i915_ppgtt->pml4) = s->i915_context_pml4;
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		px_dma(&ppgtt->pml4) = s->i915_context_pml4;
 	} else {
 		for (i = 0; i < GEN8_3LVL_PDPES; i++)
-			px_dma(i915_ppgtt->pdp.page_directory[i]) =
-						s->i915_context_pdps[i];
+			px_dma(ppgtt->pdp.page_directory[i]) =
+				s->i915_context_pdps[i];
 	}
 }
 
@@ -1132,10 +1110,15 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
 void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
 {
 	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
 	intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
-	i915_context_ppgtt_root_restore(s);
-	i915_gem_context_put(s->shadow_ctx);
+
+	i915_context_ppgtt_root_restore(s, s->shadow[0]->gem_context->ppgtt);
+	for_each_engine(engine, vgpu->gvt->dev_priv, id)
+		intel_context_unpin(s->shadow[id]);
+
 	kmem_cache_destroy(s->workloads);
 }
 
@@ -1161,17 +1144,17 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
 }
 
 static void
-i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
+i915_context_ppgtt_root_save(struct intel_vgpu_submission *s,
+			     struct i915_hw_ppgtt *ppgtt)
 {
-	struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
 	int i;
 
-	if (i915_vm_is_4lvl(&i915_ppgtt->vm))
-		s->i915_context_pml4 = px_dma(&i915_ppgtt->pml4);
-	else {
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		s->i915_context_pml4 = px_dma(&ppgtt->pml4);
+	} else {
 		for (i = 0; i < GEN8_3LVL_PDPES; i++)
 			s->i915_context_pdps[i] =
-				px_dma(i915_ppgtt->pdp.page_directory[i]);
+				px_dma(ppgtt->pdp.page_directory[i]);
 	}
 }
 
@@ -1188,16 +1171,31 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
 int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 {
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	enum intel_engine_id i;
 	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id i;
 	int ret;
 
-	s->shadow_ctx = i915_gem_context_create_gvt(
-			&vgpu->gvt->dev_priv->drm);
-	if (IS_ERR(s->shadow_ctx))
-		return PTR_ERR(s->shadow_ctx);
+	ctx = i915_gem_context_create_gvt(&vgpu->gvt->dev_priv->drm);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	i915_context_ppgtt_root_save(s, ctx->ppgtt);
+
+	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
+		struct intel_context *ce;
+
+		INIT_LIST_HEAD(&s->workload_q_head[i]);
+		s->shadow[i] = ERR_PTR(-EINVAL);
 
-	i915_context_ppgtt_root_save(s);
+		ce = intel_context_pin(ctx, engine);
+		if (IS_ERR(ce)) {
+			ret = PTR_ERR(ce);
+			goto out_shadow_ctx;
+		}
+
+		s->shadow[i] = ce;
+	}
 
 	bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
 
@@ -1213,16 +1211,21 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		goto out_shadow_ctx;
 	}
 
-	for_each_engine(engine, vgpu->gvt->dev_priv, i)
-		INIT_LIST_HEAD(&s->workload_q_head[i]);
-
 	atomic_set(&s->running_workload_num, 0);
 	bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
 
+	i915_gem_context_put(ctx);
 	return 0;
 
 out_shadow_ctx:
-	i915_gem_context_put(s->shadow_ctx);
+	i915_context_ppgtt_root_restore(s, ctx->ppgtt);
+	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
+		if (IS_ERR(s->shadow[i]))
+			break;
+
+		intel_context_unpin(s->shadow[i]);
+	}
+	i915_gem_context_put(ctx);
 	return ret;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 14/32] drm/i915: Explicitly pin the logical context for execbuf
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (11 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 13/32] drm/i915/gvt: Pin the per-engine GVT shadow contexts Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 15/32] drm/i915: Export intel_context_instance() Chris Wilson
                   ` (21 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

In order to separate the reservation phase of building a request from
its emission phase, we need to pull some of the request alloc activities
from deep inside i915_request to the surface, GEM_EXECBUFFER.

v2: Be frivolous, use a local drm_i915_private..

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 108 +++++++++++++--------
 drivers/gpu/drm/i915/i915_request.c        |   9 --
 2 files changed, 69 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3d672c9edb94..794af8edc6a2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,6 +34,8 @@
 #include <drm/drm_syncobj.h>
 #include <drm/i915_drm.h>
 
+#include "gt/intel_gt_pm.h"
+
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_trace.h"
@@ -236,7 +238,8 @@ struct i915_execbuffer {
 	unsigned int *flags;
 
 	struct intel_engine_cs *engine; /** engine to queue the request to */
-	struct i915_gem_context *ctx; /** context for building the request */
+	struct intel_context *context; /* logical state for the request */
+	struct i915_gem_context *gem_context; /** caller's context */
 	struct i915_address_space *vm; /** GTT and vma for the request */
 
 	struct i915_request *request; /** our request to build */
@@ -738,7 +741,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	if (unlikely(!ctx))
 		return -ENOENT;
 
-	eb->ctx = ctx;
+	eb->gem_context = ctx;
 	if (ctx->ppgtt) {
 		eb->vm = &ctx->ppgtt->vm;
 		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
@@ -784,7 +787,6 @@ static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
 
 static int eb_wait_for_ring(const struct i915_execbuffer *eb)
 {
-	const struct intel_context *ce;
 	struct i915_request *rq;
 	int ret = 0;
 
@@ -794,11 +796,7 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
 	 * keeping all of their resources pinned.
 	 */
 
-	ce = intel_context_lookup(eb->ctx, eb->engine);
-	if (!ce || !ce->ring) /* first use, assume empty! */
-		return 0;
-
-	rq = __eb_wait_for_ring(ce->ring);
+	rq = __eb_wait_for_ring(eb->context->ring);
 	if (rq) {
 		mutex_unlock(&eb->i915->drm.struct_mutex);
 
@@ -817,15 +815,15 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
-	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
+	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
 	struct drm_i915_gem_object *obj;
 	unsigned int i, batch;
 	int err;
 
-	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
+	if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
 		return -ENOENT;
 
-	if (unlikely(i915_gem_context_is_banned(eb->ctx)))
+	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
 		return -EIO;
 
 	INIT_LIST_HEAD(&eb->relocs);
@@ -870,8 +868,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		if (!vma->open_count++)
 			i915_vma_reopen(vma);
 		list_add(&lut->obj_link, &obj->lut_list);
-		list_add(&lut->ctx_link, &eb->ctx->handles_list);
-		lut->ctx = eb->ctx;
+		list_add(&lut->ctx_link, &eb->gem_context->handles_list);
+		lut->ctx = eb->gem_context;
 		lut->handle = handle;
 
 add_vma:
@@ -1227,7 +1225,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto err_unmap;
 
-	rq = i915_request_alloc(eb->engine, eb->ctx);
+	rq = i915_request_create(eb->context);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
@@ -2088,31 +2086,65 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
 	[I915_EXEC_VEBOX]	= VECS0
 };
 
-static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+static int eb_pin_context(struct i915_execbuffer *eb,
+			  struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	int err;
+
+	/*
+	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+	 * EIO if the GPU is already wedged.
+	 */
+	err = i915_terminally_wedged(eb->i915);
+	if (err)
+		return err;
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ce = intel_context_pin(eb->gem_context, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	eb->engine = engine;
+	eb->context = ce;
+	return 0;
+}
+
+static void eb_unpin_context(struct i915_execbuffer *eb)
+{
+	intel_context_unpin(eb->context);
+}
+
+static int
+eb_select_engine(struct i915_execbuffer *eb,
 		 struct drm_file *file,
 		 struct drm_i915_gem_execbuffer2 *args)
 {
+	struct drm_i915_private *i915 = eb->i915;
 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 	struct intel_engine_cs *engine;
 
 	if (user_ring_id > I915_USER_RINGS) {
 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
-		return NULL;
+		return -EINVAL;
 	}
 
 	if ((user_ring_id != I915_EXEC_BSD) &&
 	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
 		DRM_DEBUG("execbuf with non bsd ring but with invalid "
 			  "bsd dispatch flags: %d\n", (int)(args->flags));
-		return NULL;
+		return -EINVAL;
 	}
 
-	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
+	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) {
 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+			bsd_idx = gen8_dispatch_bsd_engine(i915, file);
 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
 			   bsd_idx <= I915_EXEC_BSD_RING2) {
 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2120,20 +2152,20 @@ eb_select_engine(struct drm_i915_private *dev_priv,
 		} else {
 			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
 				  bsd_idx);
-			return NULL;
+			return -EINVAL;
 		}
 
-		engine = dev_priv->engine[_VCS(bsd_idx)];
+		engine = i915->engine[_VCS(bsd_idx)];
 	} else {
-		engine = dev_priv->engine[user_ring_map[user_ring_id]];
+		engine = i915->engine[user_ring_map[user_ring_id]];
 	}
 
 	if (!engine) {
 		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
-		return NULL;
+		return -EINVAL;
 	}
 
-	return engine;
+	return eb_pin_context(eb, engine);
 }
 
 static void
@@ -2275,7 +2307,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
 	struct sync_file *out_fence = NULL;
-	intel_wakeref_t wakeref;
 	int out_fence_fd = -1;
 	int err;
 
@@ -2335,12 +2366,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	eb.engine = eb_select_engine(eb.i915, file, args);
-	if (!eb.engine) {
-		err = -EINVAL;
-		goto err_engine;
-	}
-
 	/*
 	 * Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
@@ -2348,16 +2373,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 * wakeref that we hold until the GPU has been idle for at least
 	 * 100ms.
 	 */
-	wakeref = intel_runtime_pm_get(eb.i915);
+	intel_gt_pm_get(eb.i915);
 
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
 		goto err_rpm;
 
-	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+	err = eb_select_engine(&eb, file, args);
 	if (unlikely(err))
 		goto err_unlock;
 
+	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+	if (unlikely(err))
+		goto err_engine;
+
 	err = eb_relocate(&eb);
 	if (err) {
 		/*
@@ -2441,7 +2470,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	GEM_BUG_ON(eb.reloc_cache.rq);
 
 	/* Allocate a request for this batch buffer nice and early. */
-	eb.request = i915_request_alloc(eb.engine, eb.ctx);
+	eb.request = i915_request_create(eb.context);
 	if (IS_ERR(eb.request)) {
 		err = PTR_ERR(eb.request);
 		goto err_batch_unpin;
@@ -2479,8 +2508,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	trace_i915_request_queue(eb.request, eb.batch_flags);
 	err = eb_submit(&eb);
 err_request:
-	i915_request_add(eb.request);
 	add_to_client(eb.request, file);
+	i915_request_add(eb.request);
 
 	if (fences)
 		signal_fence_array(&eb, fences);
@@ -2502,12 +2531,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
+err_engine:
+	eb_unpin_context(&eb);
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
-	intel_runtime_pm_put(eb.i915, wakeref);
-err_engine:
-	i915_gem_context_put(eb.ctx);
+	intel_gt_pm_put(eb.i915);
+	i915_gem_context_put(eb.gem_context);
 err_destroy:
 	eb_destroy(&eb);
 err_out_fence:
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d116b5e69826..818d6dc6b8c8 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -786,7 +786,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	struct drm_i915_private *i915 = engine->i915;
 	struct intel_context *ce;
 	struct i915_request *rq;
-	int ret;
 
 	/*
 	 * Preempt contexts are reserved for exclusive use to inject a
@@ -795,14 +794,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	GEM_BUG_ON(ctx == i915->preempt_context);
 
-	/*
-	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-	 * EIO if the GPU is already wedged.
-	 */
-	ret = i915_terminally_wedged(i915);
-	if (ret)
-		return ERR_PTR(ret);
-
 	/*
 	 * Pinning the contexts may generate requests in order to acquire
 	 * GGTT space, so do this first before we reserve a seqno for
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 15/32] drm/i915: Export intel_context_instance()
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (12 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 14/32] drm/i915: Explicitly pin the logical context for execbuf Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 16/32] drm/i915/selftests: Use the real kernel context for sseu isolation tests Chris Wilson
                   ` (20 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

We want to pass in a intel_context into intel_context_pin() and that
requires us to first be able to lookup the intel_context!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c    | 37 +++++++++++-----------
 drivers/gpu/drm/i915/gt/intel_context.h    | 19 +++++++----
 drivers/gpu/drm/i915/gt/intel_engine_cs.c  |  8 ++++-
 drivers/gpu/drm/i915/gt/mock_engine.c      |  8 ++++-
 drivers/gpu/drm/i915/gvt/scheduler.c       |  7 +++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++++--
 drivers/gpu/drm/i915/i915_perf.c           | 21 ++++++++----
 drivers/gpu/drm/i915/i915_request.c        | 11 ++++++-
 8 files changed, 83 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 298e463ad082..8b386202b374 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -104,7 +104,7 @@ void __intel_context_remove(struct intel_context *ce)
 	spin_unlock(&ctx->hw_contexts_lock);
 }
 
-static struct intel_context *
+struct intel_context *
 intel_context_instance(struct i915_gem_context *ctx,
 		       struct intel_engine_cs *engine)
 {
@@ -112,7 +112,7 @@ intel_context_instance(struct i915_gem_context *ctx,
 
 	ce = intel_context_lookup(ctx, engine);
 	if (likely(ce))
-		return ce;
+		return intel_context_get(ce);
 
 	ce = intel_context_alloc();
 	if (!ce)
@@ -125,7 +125,7 @@ intel_context_instance(struct i915_gem_context *ctx,
 		intel_context_free(ce);
 
 	GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
-	return pos;
+	return intel_context_get(pos);
 }
 
 struct intel_context *
@@ -139,30 +139,30 @@ intel_context_pin_lock(struct i915_gem_context *ctx,
 	if (IS_ERR(ce))
 		return ce;
 
-	if (mutex_lock_interruptible(&ce->pin_mutex))
+	if (mutex_lock_interruptible(&ce->pin_mutex)) {
+		intel_context_put(ce);
 		return ERR_PTR(-EINTR);
+	}
 
 	return ce;
 }
 
-struct intel_context *
-intel_context_pin(struct i915_gem_context *ctx,
-		  struct intel_engine_cs *engine)
+void intel_context_pin_unlock(struct intel_context *ce)
+	__releases(ce->pin_mutex)
 {
-	struct intel_context *ce;
-	int err;
-
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
+	mutex_unlock(&ce->pin_mutex);
+	intel_context_put(ce);
+}
 
-	if (likely(atomic_inc_not_zero(&ce->pin_count)))
-		return ce;
+int __intel_context_do_pin(struct intel_context *ce)
+{
+	int err;
 
 	if (mutex_lock_interruptible(&ce->pin_mutex))
-		return ERR_PTR(-EINTR);
+		return -EINTR;
 
 	if (likely(!atomic_read(&ce->pin_count))) {
+		struct i915_gem_context *ctx = ce->gem_context;
 		intel_wakeref_t wakeref;
 
 		err = 0;
@@ -172,7 +172,6 @@ intel_context_pin(struct i915_gem_context *ctx,
 			goto err;
 
 		i915_gem_context_get(ctx);
-		GEM_BUG_ON(ce->gem_context != ctx);
 
 		mutex_lock(&ctx->mutex);
 		list_add(&ce->active_link, &ctx->active_engines);
@@ -186,11 +185,11 @@ intel_context_pin(struct i915_gem_context *ctx,
 	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
 
 	mutex_unlock(&ce->pin_mutex);
-	return ce;
+	return 0;
 
 err:
 	mutex_unlock(&ce->pin_mutex);
-	return ERR_PTR(err);
+	return err;
 }
 
 void intel_context_unpin(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 60379eb37949..b9a574587eb3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -49,11 +49,7 @@ intel_context_is_pinned(struct intel_context *ce)
 	return atomic_read(&ce->pin_count);
 }
 
-static inline void intel_context_pin_unlock(struct intel_context *ce)
-__releases(ce->pin_mutex)
-{
-	mutex_unlock(&ce->pin_mutex);
-}
+void intel_context_pin_unlock(struct intel_context *ce);
 
 struct intel_context *
 __intel_context_insert(struct i915_gem_context *ctx,
@@ -63,7 +59,18 @@ void
 __intel_context_remove(struct intel_context *ce);
 
 struct intel_context *
-intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
+intel_context_instance(struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine);
+
+int __intel_context_do_pin(struct intel_context *ce);
+
+static inline int intel_context_pin(struct intel_context *ce)
+{
+	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+		return 0;
+
+	return __intel_context_do_pin(ce);
+}
 
 static inline void __intel_context_pin(struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 268dfb8e16ff..1dbec7a9a554 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -697,11 +697,17 @@ static int pin_context(struct i915_gem_context *ctx,
 		       struct intel_context **out)
 {
 	struct intel_context *ce;
+	int err;
 
-	ce = intel_context_pin(ctx, engine);
+	ce = intel_context_instance(ctx, engine);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
+	err = intel_context_pin(ce);
+	intel_context_put(ce);
+	if (err)
+		return err;
+
 	*out = ce;
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index a97a0ab35703..21f413ff5b8e 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -239,6 +239,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 				    int id)
 {
 	struct mock_engine *engine;
+	int err;
 
 	GEM_BUG_ON(id >= I915_NUM_ENGINES);
 
@@ -278,10 +279,15 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&engine->hw_queue);
 
 	engine->base.kernel_context =
-		intel_context_pin(i915->kernel_context, &engine->base);
+		intel_context_instance(i915->kernel_context, &engine->base);
 	if (IS_ERR(engine->base.kernel_context))
 		goto err_breadcrumbs;
 
+	err = intel_context_pin(engine->base.kernel_context);
+	intel_context_put(engine->base.kernel_context);
+	if (err)
+		goto err_breadcrumbs;
+
 	return &engine->base;
 
 err_breadcrumbs:
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 40d9f549a0cd..606fc2713240 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1188,12 +1188,17 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		INIT_LIST_HEAD(&s->workload_q_head[i]);
 		s->shadow[i] = ERR_PTR(-EINVAL);
 
-		ce = intel_context_pin(ctx, engine);
+		ce = intel_context_instance(ctx, engine);
 		if (IS_ERR(ce)) {
 			ret = PTR_ERR(ce);
 			goto out_shadow_ctx;
 		}
 
+		ret = intel_context_pin(ce);
+		intel_context_put(ce);
+		if (ret)
+			goto out_shadow_ctx;
+
 		s->shadow[i] = ce;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 794af8edc6a2..166a33c0d3ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2100,14 +2100,19 @@ static int eb_pin_context(struct i915_execbuffer *eb,
 	if (err)
 		return err;
 
+	ce = intel_context_instance(eb->gem_context, engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
 	/*
 	 * Pinning the contexts may generate requests in order to acquire
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ce = intel_context_pin(eb->gem_context, engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
+	err = intel_context_pin(ce);
+	intel_context_put(ce);
+	if (err)
+		return err;
 
 	eb->engine = engine;
 	eb->context = ce;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 328a740e72cb..afaeabe5e531 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1205,11 +1205,17 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
 {
 	struct intel_engine_cs *engine = i915->engine[RCS0];
 	struct intel_context *ce;
-	int ret;
+	int err;
 
-	ret = i915_mutex_lock_interruptible(&i915->drm);
-	if (ret)
-		return ERR_PTR(ret);
+	ce = intel_context_instance(ctx, engine);
+	if (IS_ERR(ce))
+		return ce;
+
+	err = i915_mutex_lock_interruptible(&i915->drm);
+	if (err) {
+		intel_context_put(ce);
+		return ERR_PTR(err);
+	}
 
 	/*
 	 * As the ID is the gtt offset of the context's vma we
@@ -1217,10 +1223,11 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
 	 *
 	 * NB: implied RCS engine...
 	 */
-	ce = intel_context_pin(ctx, engine);
+	err = intel_context_pin(ce);
 	mutex_unlock(&i915->drm.struct_mutex);
-	if (IS_ERR(ce))
-		return ce;
+	intel_context_put(ce);
+	if (err)
+		return ERR_PTR(err);
 
 	i915->perf.oa.pinned_ctx = ce;
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 818d6dc6b8c8..975491f763df 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -786,6 +786,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	struct drm_i915_private *i915 = engine->i915;
 	struct intel_context *ce;
 	struct i915_request *rq;
+	int err;
 
 	/*
 	 * Preempt contexts are reserved for exclusive use to inject a
@@ -799,13 +800,21 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ce = intel_context_pin(ctx, engine);
+	ce = intel_context_instance(ctx, engine);
 	if (IS_ERR(ce))
 		return ERR_CAST(ce);
 
+	err = intel_context_pin(ce);
+	if (err) {
+		rq = ERR_PTR(err);
+		goto err_put;
+	}
+
 	rq = i915_request_create(ce);
 	intel_context_unpin(ce);
 
+err_put:
+	intel_context_put(ce);
 	return rq;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 16/32] drm/i915/selftests: Use the real kernel context for sseu isolation tests
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (13 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 15/32] drm/i915: Export intel_context_instance() Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 17/32] drm/i915/selftests: Pass around intel_context for sseu Chris Wilson
                   ` (19 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Simply the setup slightly for the sseu selftests to use the actual
kernel_context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/selftests/i915_gem_context.c   | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 71d896bbade2..807644ae6877 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -957,7 +957,6 @@ __sseu_finish(struct drm_i915_private *i915,
 	      const char *name,
 	      unsigned int flags,
 	      struct i915_gem_context *ctx,
-	      struct i915_gem_context *kctx,
 	      struct intel_engine_cs *engine,
 	      struct drm_i915_gem_object *obj,
 	      unsigned int expected,
@@ -979,7 +978,8 @@ __sseu_finish(struct drm_i915_private *i915,
 	if (ret)
 		goto out;
 
-	ret = __read_slice_count(i915, kctx, engine, obj, NULL, &rpcs);
+	ret = __read_slice_count(i915, i915->kernel_context, engine, obj,
+				 NULL, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
 
 out:
@@ -1011,22 +1011,17 @@ __sseu_test(struct drm_i915_private *i915,
 	    struct intel_sseu sseu)
 {
 	struct igt_spinner *spin = NULL;
-	struct i915_gem_context *kctx;
 	int ret;
 
-	kctx = kernel_context(i915);
-	if (IS_ERR(kctx))
-		return PTR_ERR(kctx);
-
 	ret = __sseu_prepare(i915, name, flags, ctx, engine, &spin);
 	if (ret)
-		goto out_context;
+		return ret;
 
 	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
 	if (ret)
 		goto out_spin;
 
-	ret = __sseu_finish(i915, name, flags, ctx, kctx, engine, obj,
+	ret = __sseu_finish(i915, name, flags, ctx, engine, obj,
 			    hweight32(sseu.slice_mask), spin);
 
 out_spin:
@@ -1035,10 +1030,6 @@ __sseu_test(struct drm_i915_private *i915,
 		igt_spinner_fini(spin);
 		kfree(spin);
 	}
-
-out_context:
-	kernel_context_close(kctx);
-
 	return ret;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 17/32] drm/i915/selftests: Pass around intel_context for sseu
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (14 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 16/32] drm/i915/selftests: Use the real kernel context for sseu isolation tests Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 18/32] drm/i915: Pass intel_context to intel_context_pin_lock() Chris Wilson
                   ` (18 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Combine the (i915_gem_context, intel_engine) into a single parameter,
the intel_context for convenience and later simplification.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 .../gpu/drm/i915/selftests/i915_gem_context.c | 74 +++++++++++--------
 1 file changed, 44 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 807644ae6877..8e2a94333559 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -755,8 +755,7 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
 
 static int
 emit_rpcs_query(struct drm_i915_gem_object *obj,
-		struct i915_gem_context *ctx,
-		struct intel_engine_cs *engine,
+		struct intel_context *ce,
 		struct i915_request **rq_out)
 {
 	struct i915_request *rq;
@@ -764,9 +763,9 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int err;
 
-	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
 
-	vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
+	vma = i915_vma_instance(obj, &ce->gem_context->ppgtt->vm, NULL);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
@@ -784,13 +783,15 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 		goto err_vma;
 	}
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = i915_request_create(ce);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_batch;
 	}
 
-	err = engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0);
+	err = rq->engine->emit_bb_start(rq,
+					batch->node.start, batch->node.size,
+					0);
 	if (err)
 		goto err_request;
 
@@ -834,8 +835,7 @@ static int
 __sseu_prepare(struct drm_i915_private *i915,
 	       const char *name,
 	       unsigned int flags,
-	       struct i915_gem_context *ctx,
-	       struct intel_engine_cs *engine,
+	       struct intel_context *ce,
 	       struct igt_spinner **spin)
 {
 	struct i915_request *rq;
@@ -853,7 +853,10 @@ __sseu_prepare(struct drm_i915_private *i915,
 	if (ret)
 		goto err_free;
 
-	rq = igt_spinner_create_request(*spin, ctx, engine, MI_NOOP);
+	rq = igt_spinner_create_request(*spin,
+					ce->gem_context,
+					ce->engine,
+					MI_NOOP);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 		goto err_fini;
@@ -880,8 +883,7 @@ __sseu_prepare(struct drm_i915_private *i915,
 
 static int
 __read_slice_count(struct drm_i915_private *i915,
-		   struct i915_gem_context *ctx,
-		   struct intel_engine_cs *engine,
+		   struct intel_context *ce,
 		   struct drm_i915_gem_object *obj,
 		   struct igt_spinner *spin,
 		   u32 *rpcs)
@@ -892,7 +894,7 @@ __read_slice_count(struct drm_i915_private *i915,
 	u32 *buf, val;
 	long ret;
 
-	ret = emit_rpcs_query(obj, ctx, engine, &rq);
+	ret = emit_rpcs_query(obj, ce, &rq);
 	if (ret)
 		return ret;
 
@@ -956,29 +958,28 @@ static int
 __sseu_finish(struct drm_i915_private *i915,
 	      const char *name,
 	      unsigned int flags,
-	      struct i915_gem_context *ctx,
-	      struct intel_engine_cs *engine,
+	      struct intel_context *ce,
 	      struct drm_i915_gem_object *obj,
 	      unsigned int expected,
 	      struct igt_spinner *spin)
 {
-	unsigned int slices = hweight32(engine->sseu.slice_mask);
+	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
 	u32 rpcs = 0;
 	int ret = 0;
 
 	if (flags & TEST_RESET) {
-		ret = i915_reset_engine(engine, "sseu");
+		ret = i915_reset_engine(ce->engine, "sseu");
 		if (ret)
 			goto out;
 	}
 
-	ret = __read_slice_count(i915, ctx, engine, obj,
+	ret = __read_slice_count(i915, ce, obj,
 				 flags & TEST_RESET ? NULL : spin, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
 	if (ret)
 		goto out;
 
-	ret = __read_slice_count(i915, i915->kernel_context, engine, obj,
+	ret = __read_slice_count(i915, ce->engine->kernel_context, obj,
 				 NULL, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
 
@@ -993,7 +994,7 @@ __sseu_finish(struct drm_i915_private *i915,
 		if (ret)
 			return ret;
 
-		ret = __read_slice_count(i915, ctx, engine, obj, NULL, &rpcs);
+		ret = __read_slice_count(i915, ce, obj, NULL, &rpcs);
 		ret = __check_rpcs(name, rpcs, ret, expected,
 				   "Context", " after idle!");
 	}
@@ -1005,23 +1006,22 @@ static int
 __sseu_test(struct drm_i915_private *i915,
 	    const char *name,
 	    unsigned int flags,
-	    struct i915_gem_context *ctx,
-	    struct intel_engine_cs *engine,
+	    struct intel_context *ce,
 	    struct drm_i915_gem_object *obj,
 	    struct intel_sseu sseu)
 {
 	struct igt_spinner *spin = NULL;
 	int ret;
 
-	ret = __sseu_prepare(i915, name, flags, ctx, engine, &spin);
+	ret = __sseu_prepare(i915, name, flags, ce, &spin);
 	if (ret)
 		return ret;
 
-	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = __i915_gem_context_reconfigure_sseu(ce->gem_context, ce->engine, sseu);
 	if (ret)
 		goto out_spin;
 
-	ret = __sseu_finish(i915, name, flags, ctx, engine, obj,
+	ret = __sseu_finish(i915, name, flags, ce, obj,
 			    hweight32(sseu.slice_mask), spin);
 
 out_spin:
@@ -1042,6 +1042,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	struct intel_sseu default_sseu = engine->sseu;
 	struct drm_i915_gem_object *obj;
 	struct i915_gem_context *ctx;
+	struct intel_context *ce;
 	struct intel_sseu pg_sseu;
 	intel_wakeref_t wakeref;
 	struct drm_file *file;
@@ -1093,23 +1094,33 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 
 	wakeref = intel_runtime_pm_get(i915);
 
+	ce = intel_context_instance(ctx, i915->engine[RCS0]);
+	if (IS_ERR(ce)) {
+		ret = PTR_ERR(ce);
+		goto out_rpm;
+	}
+
+	ret = intel_context_pin(ce);
+	if (ret)
+		goto out_context;
+
 	/* First set the default mask. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
 	if (ret)
 		goto out_fail;
 
 	/* Then set a power-gated configuration. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
 	if (ret)
 		goto out_fail;
 
 	/* Back to defaults. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
 	if (ret)
 		goto out_fail;
 
 	/* One last power-gated configuration for the road. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
 	if (ret)
 		goto out_fail;
 
@@ -1117,9 +1128,12 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		ret = -EIO;
 
-	i915_gem_object_put(obj);
-
+	intel_context_unpin(ce);
+out_context:
+	intel_context_put(ce);
+out_rpm:
 	intel_runtime_pm_put(i915, wakeref);
+	i915_gem_object_put(obj);
 
 out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 18/32] drm/i915: Pass intel_context to intel_context_pin_lock()
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (15 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 17/32] drm/i915/selftests: Pass around intel_context for sseu Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 19/32] drm/i915: Split engine setup/init into two phases Chris Wilson
                   ` (17 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Move the intel_context_instance() to the caller so that we can decouple
ourselves from one context instance per engine.

v2: Rename pin_lock() to lock_pinned(), hopefully that is clearer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 26 ------
 drivers/gpu/drm/i915/gt/intel_context.h       | 34 +++++--
 drivers/gpu/drm/i915/i915_gem_context.c       | 92 +++++++++++--------
 .../gpu/drm/i915/selftests/i915_gem_context.c |  2 +-
 4 files changed, 82 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 8b386202b374..15ac99c5dd4a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -128,32 +128,6 @@ intel_context_instance(struct i915_gem_context *ctx,
 	return intel_context_get(pos);
 }
 
-struct intel_context *
-intel_context_pin_lock(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine)
-	__acquires(ce->pin_mutex)
-{
-	struct intel_context *ce;
-
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
-
-	if (mutex_lock_interruptible(&ce->pin_mutex)) {
-		intel_context_put(ce);
-		return ERR_PTR(-EINTR);
-	}
-
-	return ce;
-}
-
-void intel_context_pin_unlock(struct intel_context *ce)
-	__releases(ce->pin_mutex)
-{
-	mutex_unlock(&ce->pin_mutex);
-	intel_context_put(ce);
-}
-
 int __intel_context_do_pin(struct intel_context *ce)
 {
 	int err;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index b9a574587eb3..b746add6b71d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -31,25 +31,45 @@ intel_context_lookup(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine);
 
 /**
- * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
- * @ctx - the parent GEM context
- * @engine - the target HW engine
+ * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
+ * @ce - the context
  *
  * Acquire a lock on the pinned status of the HW context, such that the context
  * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
  * intel_context_is_pinned() remains stable.
  */
-struct intel_context *
-intel_context_pin_lock(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine);
+static inline int intel_context_lock_pinned(struct intel_context *ce)
+	__acquires(ce->pin_mutex)
+{
+	return mutex_lock_interruptible(&ce->pin_mutex);
+}
 
+/**
+ * intel_context_is_pinned - Reports the 'pinned' status
+ * @ce - the context
+ *
+ * While in use by the GPU, the context, along with its ring and page
+ * tables is pinned into memory and the GTT.
+ *
+ * Returns: true if the context is currently pinned for use by the GPU.
+ */
 static inline bool
 intel_context_is_pinned(struct intel_context *ce)
 {
 	return atomic_read(&ce->pin_count);
 }
 
-void intel_context_pin_unlock(struct intel_context *ce);
+/**
+ * intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status
+ * @ce - the context
+ *
+ * Releases the lock earlier acquired by intel_context_unlock_pinned().
+ */
+static inline void intel_context_unlock_pinned(struct intel_context *ce)
+	__releases(ce->pin_mutex)
+{
+	mutex_unlock(&ce->pin_mutex);
+}
 
 struct intel_context *
 __intel_context_insert(struct i915_gem_context *ctx,
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 76ed74e75d82..1e1770047cc2 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -141,6 +141,18 @@ static void lut_close(struct i915_gem_context *ctx)
 	rcu_read_unlock();
 }
 
+static struct intel_context *
+lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance)
+{
+	struct intel_engine_cs *engine;
+
+	engine = intel_engine_lookup_user(ctx->i915, class, instance);
+	if (!engine)
+		return ERR_PTR(-EINVAL);
+
+	return intel_context_instance(ctx, engine);
+}
+
 static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
 {
 	unsigned int max;
@@ -1142,19 +1154,17 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 }
 
 static int
-__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				    struct intel_engine_cs *engine,
-				    struct intel_sseu sseu)
+__intel_context_reconfigure_sseu(struct intel_context *ce,
+				 struct intel_sseu sseu)
 {
-	struct intel_context *ce;
-	int ret = 0;
+	int ret;
 
-	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
-	GEM_BUG_ON(engine->id != RCS0);
+	GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
+	GEM_BUG_ON(ce->engine->id != RCS0);
 
-	ce = intel_context_pin_lock(ctx, engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
+	ret = intel_context_lock_pinned(ce);
+	if (ret)
+		return ret;
 
 	/* Nothing to do if unmodified. */
 	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
@@ -1165,24 +1175,23 @@ __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
 		ce->sseu = sseu;
 
 unlock:
-	intel_context_pin_unlock(ce);
+	intel_context_unlock_pinned(ce);
 	return ret;
 }
 
 static int
-i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				  struct intel_engine_cs *engine,
-				  struct intel_sseu sseu)
+intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
 {
+	struct drm_i915_private *i915 = ce->gem_context->i915;
 	int ret;
 
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 	if (ret)
 		return ret;
 
-	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = __intel_context_reconfigure_sseu(ce, sseu);
 
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
+	mutex_unlock(&i915->drm.struct_mutex);
 
 	return ret;
 }
@@ -1290,7 +1299,7 @@ static int set_sseu(struct i915_gem_context *ctx,
 {
 	struct drm_i915_private *i915 = ctx->i915;
 	struct drm_i915_gem_context_param_sseu user_sseu;
-	struct intel_engine_cs *engine;
+	struct intel_context *ce;
 	struct intel_sseu sseu;
 	int ret;
 
@@ -1307,27 +1316,31 @@ static int set_sseu(struct i915_gem_context *ctx,
 	if (user_sseu.flags || user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(i915,
-					  user_sseu.engine.engine_class,
-					  user_sseu.engine.engine_instance);
-	if (!engine)
-		return -EINVAL;
+	ce = lookup_user_engine(ctx,
+				user_sseu.engine.engine_class,
+				user_sseu.engine.engine_instance);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
 
 	/* Only render engine supports RPCS configuration. */
-	if (engine->class != RENDER_CLASS)
-		return -ENODEV;
+	if (ce->engine->class != RENDER_CLASS) {
+		ret = -ENODEV;
+		goto out_ce;
+	}
 
 	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
 	if (ret)
-		return ret;
+		goto out_ce;
 
-	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = intel_context_reconfigure_sseu(ce, sseu);
 	if (ret)
-		return ret;
+		goto out_ce;
 
 	args->size = sizeof(user_sseu);
 
-	return 0;
+out_ce:
+	intel_context_put(ce);
+	return ret;
 }
 
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
@@ -1532,8 +1545,8 @@ static int get_sseu(struct i915_gem_context *ctx,
 		    struct drm_i915_gem_context_param *args)
 {
 	struct drm_i915_gem_context_param_sseu user_sseu;
-	struct intel_engine_cs *engine;
 	struct intel_context *ce;
+	int err;
 
 	if (args->size == 0)
 		goto out;
@@ -1547,22 +1560,25 @@ static int get_sseu(struct i915_gem_context *ctx,
 	if (user_sseu.flags || user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(ctx->i915,
-					  user_sseu.engine.engine_class,
-					  user_sseu.engine.engine_instance);
-	if (!engine)
-		return -EINVAL;
-
-	ce = intel_context_pin_lock(ctx, engine); /* serialises with set_sseu */
+	ce = lookup_user_engine(ctx,
+				user_sseu.engine.engine_class,
+				user_sseu.engine.engine_instance);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
+	err = intel_context_lock_pinned(ce); /* serialises with set_sseu */
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
 	user_sseu.slice_mask = ce->sseu.slice_mask;
 	user_sseu.subslice_mask = ce->sseu.subslice_mask;
 	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
 	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
 
-	intel_context_pin_unlock(ce);
+	intel_context_unlock_pinned(ce);
+	intel_context_put(ce);
 
 	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
 			 sizeof(user_sseu)))
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 8e2a94333559..214d1fd2f4dc 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1017,7 +1017,7 @@ __sseu_test(struct drm_i915_private *i915,
 	if (ret)
 		return ret;
 
-	ret = __i915_gem_context_reconfigure_sseu(ce->gem_context, ce->engine, sseu);
+	ret = __intel_context_reconfigure_sseu(ce, sseu);
 	if (ret)
 		goto out_spin;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 19/32] drm/i915: Split engine setup/init into two phases
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (16 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 18/32] drm/i915: Pass intel_context to intel_context_pin_lock() Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 20/32] drm/i915: Switch back to an array of logical per-engine HW contexts Chris Wilson
                   ` (16 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we require the engine vfuncs setup prior to
initialising the pinned kernel contexts, so split the vfunc setup from
the engine initialisation and call it earlier.

v2: s/setup_xcs/setup_common/ for intel_ring_submission_setup()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |   8 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  99 ++++----
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  74 ++----
 drivers/gpu/drm/i915/gt/intel_lrc.h           |   5 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 232 +++++++++---------
 drivers/gpu/drm/i915/gt/intel_workarounds.c   |   3 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |  48 ++--
 drivers/gpu/drm/i915/gt/mock_engine.h         |   2 +
 drivers/gpu/drm/i915/i915_gem.c               |   6 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  12 +-
 10 files changed, 245 insertions(+), 244 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index a228dc1774d8..3e53f53bc52b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -362,14 +362,12 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
 	return (head - tail - CACHELINE_BYTES) & (size - 1);
 }
 
-int intel_engine_setup_common(struct intel_engine_cs *engine);
+int intel_engines_setup(struct drm_i915_private *i915);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
-int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
+int intel_ring_submission_setup(struct intel_engine_cs *engine);
+int intel_ring_submission_init(struct intel_engine_cs *engine);
 
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 1dbec7a9a554..6cb90137b2fa 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -50,35 +50,24 @@
 
 struct engine_class_info {
 	const char *name;
-	int (*init_legacy)(struct intel_engine_cs *engine);
-	int (*init_execlists)(struct intel_engine_cs *engine);
-
 	u8 uabi_class;
 };
 
 static const struct engine_class_info intel_engine_classes[] = {
 	[RENDER_CLASS] = {
 		.name = "rcs",
-		.init_execlists = logical_render_ring_init,
-		.init_legacy = intel_init_render_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_RENDER,
 	},
 	[COPY_ENGINE_CLASS] = {
 		.name = "bcs",
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_blt_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_COPY,
 	},
 	[VIDEO_DECODE_CLASS] = {
 		.name = "vcs",
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_bsd_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_VIDEO,
 	},
 	[VIDEO_ENHANCEMENT_CLASS] = {
 		.name = "vecs",
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_vebox_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE,
 	},
 };
@@ -400,48 +389,39 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 
 /**
  * intel_engines_init() - init the Engine Command Streamers
- * @dev_priv: i915 device private
+ * @i915: i915 device private
  *
  * Return: non-zero if the initialization failed.
  */
-int intel_engines_init(struct drm_i915_private *dev_priv)
+int intel_engines_init(struct drm_i915_private *i915)
 {
+	int (*init)(struct intel_engine_cs *engine);
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id, err_id;
 	int err;
 
-	for_each_engine(engine, dev_priv, id) {
-		const struct engine_class_info *class_info =
-			&intel_engine_classes[engine->class];
-		int (*init)(struct intel_engine_cs *engine);
-
-		if (HAS_EXECLISTS(dev_priv))
-			init = class_info->init_execlists;
-		else
-			init = class_info->init_legacy;
+	if (HAS_EXECLISTS(i915))
+		init = intel_execlists_submission_init;
+	else
+		init = intel_ring_submission_init;
 
-		err = -EINVAL;
+	for_each_engine(engine, i915, id) {
 		err_id = id;
 
-		if (GEM_DEBUG_WARN_ON(!init))
-			goto cleanup;
-
 		err = init(engine);
 		if (err)
 			goto cleanup;
-
-		GEM_BUG_ON(!engine->submit_request);
 	}
 
 	return 0;
 
 cleanup:
-	for_each_engine(engine, dev_priv, id) {
+	for_each_engine(engine, i915, id) {
 		if (id >= err_id) {
 			kfree(engine);
-			dev_priv->engine[id] = NULL;
+			i915->engine[id] = NULL;
 		} else {
-			dev_priv->gt.cleanup_engine(engine);
+			i915->gt.cleanup_engine(engine);
 		}
 	}
 	return err;
@@ -559,16 +539,7 @@ static int init_status_page(struct intel_engine_cs *engine)
 	return ret;
 }
 
-/**
- * intel_engines_setup_common - setup engine state not requiring hw access
- * @engine: Engine to setup.
- *
- * Initializes @engine@ structure members shared between legacy and execlists
- * submission modes which do not require hardware access.
- *
- * Typically done early in the submission mode specific engine setup stage.
- */
-int intel_engine_setup_common(struct intel_engine_cs *engine)
+static int intel_engine_setup_common(struct intel_engine_cs *engine)
 {
 	int err;
 
@@ -602,6 +573,52 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
 	return err;
 }
 
+/**
+ * intel_engines_setup- setup engine state not requiring hw access
+ * @i915: Device to setup.
+ *
+ * Initializes engine structure members shared between legacy and execlists
+ * submission modes which do not require hardware access.
+ *
+ * Typically done early in the submission mode specific engine setup stage.
+ */
+int intel_engines_setup(struct drm_i915_private *i915)
+{
+	int (*setup)(struct intel_engine_cs *engine);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
+
+	if (HAS_EXECLISTS(i915))
+		setup = intel_execlists_submission_setup;
+	else
+		setup = intel_ring_submission_setup;
+
+	for_each_engine(engine, i915, id) {
+		err = intel_engine_setup_common(engine);
+		if (err)
+			goto cleanup;
+
+		err = setup(engine);
+		if (err)
+			goto cleanup;
+
+		GEM_BUG_ON(!engine->cops);
+	}
+
+	return 0;
+
+cleanup:
+	for_each_engine(engine, i915, id) {
+		if (engine->cops)
+			i915->gt.cleanup_engine(engine);
+		else
+			kfree(engine);
+		i915->engine[id] = NULL;
+	}
+	return err;
+}
+
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
 {
 	static const struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d17c08e26935..01f58a152a9e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1697,8 +1697,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	unsigned int i;
 	int ret;
 
-	if (GEM_DEBUG_WARN_ON(engine->id != RCS0))
-		return -EINVAL;
+	if (engine->class != RENDER_CLASS)
+		return 0;
 
 	switch (INTEL_GEN(engine->i915)) {
 	case 11:
@@ -2444,15 +2444,8 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
 
-static int
-logical_ring_setup(struct intel_engine_cs *engine)
+int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 {
-	int err;
-
-	err = intel_engine_setup_common(engine);
-	if (err)
-		return err;
-
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
 
@@ -2462,10 +2455,16 @@ logical_ring_setup(struct intel_engine_cs *engine)
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
 
+	if (engine->class == RENDER_CLASS) {
+		engine->init_context = gen8_init_rcs_context;
+		engine->emit_flush = gen8_emit_flush_render;
+		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+	}
+
 	return 0;
 }
 
-static int logical_ring_init(struct intel_engine_cs *engine)
+int intel_execlists_submission_init(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -2477,6 +2476,15 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 		return ret;
 
 	intel_engine_init_workarounds(engine);
+	intel_engine_init_whitelist(engine);
+
+	if (intel_init_workaround_bb(engine))
+		/*
+		 * We continue even if we fail to initialize WA batch
+		 * because we only expect rare glitches but nothing
+		 * critical to prevent us from using GPU
+		 */
+		DRM_ERROR("WA batch buffer initialization failed\n");
 
 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
 		execlists->submit_reg = i915->uncore.regs +
@@ -2509,50 +2517,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	return 0;
 }
 
-int logical_render_ring_init(struct intel_engine_cs *engine)
-{
-	int ret;
-
-	ret = logical_ring_setup(engine);
-	if (ret)
-		return ret;
-
-	/* Override some for render ring. */
-	engine->init_context = gen8_init_rcs_context;
-	engine->emit_flush = gen8_emit_flush_render;
-	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
-
-	ret = logical_ring_init(engine);
-	if (ret)
-		return ret;
-
-	ret = intel_init_workaround_bb(engine);
-	if (ret) {
-		/*
-		 * We continue even if we fail to initialize WA batch
-		 * because we only expect rare glitches but nothing
-		 * critical to prevent us from using GPU
-		 */
-		DRM_ERROR("WA batch buffer initialization failed: %d\n",
-			  ret);
-	}
-
-	intel_engine_init_whitelist(engine);
-
-	return 0;
-}
-
-int logical_xcs_ring_init(struct intel_engine_cs *engine)
-{
-	int err;
-
-	err = logical_ring_setup(engine);
-	if (err)
-		return err;
-
-	return logical_ring_init(engine);
-}
-
 static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 {
 	u32 indirect_ctx_offset;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 1a33ec74af8c..a0dc907a7249 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -66,8 +66,9 @@ enum {
 
 /* Logical Rings */
 void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
-int logical_render_ring_init(struct intel_engine_cs *engine);
-int logical_xcs_ring_init(struct intel_engine_cs *engine);
+
+int intel_execlists_submission_setup(struct intel_engine_cs *engine);
+int intel_execlists_submission_init(struct intel_engine_cs *engine);
 
 /* Logical Ring Contexts */
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index f164dbe90050..09579bcea473 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1523,54 +1523,6 @@ static const struct intel_context_ops ring_context_ops = {
 	.destroy = ring_context_destroy,
 };
 
-static int intel_init_ring_buffer(struct intel_engine_cs *engine)
-{
-	struct i915_timeline *timeline;
-	struct intel_ring *ring;
-	int err;
-
-	err = intel_engine_setup_common(engine);
-	if (err)
-		return err;
-
-	timeline = i915_timeline_create(engine->i915, engine->status_page.vma);
-	if (IS_ERR(timeline)) {
-		err = PTR_ERR(timeline);
-		goto err;
-	}
-	GEM_BUG_ON(timeline->has_initial_breadcrumb);
-
-	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
-	i915_timeline_put(timeline);
-	if (IS_ERR(ring)) {
-		err = PTR_ERR(ring);
-		goto err;
-	}
-
-	err = intel_ring_pin(ring);
-	if (err)
-		goto err_ring;
-
-	GEM_BUG_ON(engine->buffer);
-	engine->buffer = ring;
-
-	err = intel_engine_init_common(engine);
-	if (err)
-		goto err_unpin;
-
-	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
-
-	return 0;
-
-err_unpin:
-	intel_ring_unpin(ring);
-err_ring:
-	intel_ring_put(ring);
-err:
-	intel_engine_cleanup_common(engine);
-	return err;
-}
-
 void intel_engine_cleanup(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -2166,24 +2118,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
 	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
 }
 
-static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
-				struct intel_engine_cs *engine)
-{
-	if (INTEL_GEN(dev_priv) >= 6) {
-		engine->irq_enable = gen6_irq_enable;
-		engine->irq_disable = gen6_irq_disable;
-	} else if (INTEL_GEN(dev_priv) >= 5) {
-		engine->irq_enable = gen5_irq_enable;
-		engine->irq_disable = gen5_irq_disable;
-	} else if (INTEL_GEN(dev_priv) >= 3) {
-		engine->irq_enable = i9xx_irq_enable;
-		engine->irq_disable = i9xx_irq_disable;
-	} else {
-		engine->irq_enable = i8xx_irq_enable;
-		engine->irq_disable = i8xx_irq_disable;
-	}
-}
-
 static void i9xx_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = i9xx_submit_request;
@@ -2199,13 +2133,33 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 	engine->submit_request = gen6_bsd_submit_request;
 }
 
-static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
-				      struct intel_engine_cs *engine)
+static void setup_irq(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	if (INTEL_GEN(i915) >= 6) {
+		engine->irq_enable = gen6_irq_enable;
+		engine->irq_disable = gen6_irq_disable;
+	} else if (INTEL_GEN(i915) >= 5) {
+		engine->irq_enable = gen5_irq_enable;
+		engine->irq_disable = gen5_irq_disable;
+	} else if (INTEL_GEN(i915) >= 3) {
+		engine->irq_enable = i9xx_irq_enable;
+		engine->irq_disable = i9xx_irq_disable;
+	} else {
+		engine->irq_enable = i8xx_irq_enable;
+		engine->irq_disable = i8xx_irq_disable;
+	}
+}
+
+static void setup_common(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *i915 = engine->i915;
+
 	/* gen8+ are only supported with execlists */
-	GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
+	GEM_BUG_ON(INTEL_GEN(i915) >= 8);
 
-	intel_ring_init_irq(dev_priv, engine);
+	setup_irq(engine);
 
 	engine->resume = xcs_resume;
 	engine->reset.prepare = reset_prepare;
@@ -2221,117 +2175,96 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	 * engine->emit_init_breadcrumb().
 	 */
 	engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
-	if (IS_GEN(dev_priv, 5))
+	if (IS_GEN(i915, 5))
 		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
-	if (INTEL_GEN(dev_priv) >= 6)
+	if (INTEL_GEN(i915) >= 6)
 		engine->emit_bb_start = gen6_emit_bb_start;
-	else if (INTEL_GEN(dev_priv) >= 4)
+	else if (INTEL_GEN(i915) >= 4)
 		engine->emit_bb_start = i965_emit_bb_start;
-	else if (IS_I830(dev_priv) || IS_I845G(dev_priv))
+	else if (IS_I830(i915) || IS_I845G(i915))
 		engine->emit_bb_start = i830_emit_bb_start;
 	else
 		engine->emit_bb_start = i915_emit_bb_start;
 }
 
-int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
+static void setup_rcs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	intel_ring_default_vfuncs(dev_priv, engine);
+	struct drm_i915_private *i915 = engine->i915;
 
-	if (HAS_L3_DPF(dev_priv))
+	if (HAS_L3_DPF(i915))
 		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 
-	if (INTEL_GEN(dev_priv) >= 7) {
+	if (INTEL_GEN(i915) >= 7) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
 		engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
-	} else if (IS_GEN(dev_priv, 6)) {
+	} else if (IS_GEN(i915, 6)) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen6_render_ring_flush;
 		engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
-	} else if (IS_GEN(dev_priv, 5)) {
+	} else if (IS_GEN(i915, 5)) {
 		engine->emit_flush = gen4_render_ring_flush;
 	} else {
-		if (INTEL_GEN(dev_priv) < 4)
+		if (INTEL_GEN(i915) < 4)
 			engine->emit_flush = gen2_render_ring_flush;
 		else
 			engine->emit_flush = gen4_render_ring_flush;
 		engine->irq_enable_mask = I915_USER_INTERRUPT;
 	}
 
-	if (IS_HASWELL(dev_priv))
+	if (IS_HASWELL(i915))
 		engine->emit_bb_start = hsw_emit_bb_start;
 
 	engine->resume = rcs_resume;
-
-	ret = intel_init_ring_buffer(engine);
-	if (ret)
-		return ret;
-
-	return 0;
 }
 
-int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
+static void setup_vcs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	intel_ring_default_vfuncs(dev_priv, engine);
+	struct drm_i915_private *i915 = engine->i915;
 
-	if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(i915) >= 6) {
 		/* gen6 bsd needs a special wa for tail updates */
-		if (IS_GEN(dev_priv, 6))
+		if (IS_GEN(i915, 6))
 			engine->set_default_submission = gen6_bsd_set_default_submission;
 		engine->emit_flush = gen6_bsd_ring_flush;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 
-		if (IS_GEN(dev_priv, 6))
+		if (IS_GEN(i915, 6))
 			engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
 		else
 			engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
 	} else {
 		engine->emit_flush = bsd_ring_flush;
-		if (IS_GEN(dev_priv, 5))
+		if (IS_GEN(i915, 5))
 			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
 		else
 			engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
 	}
-
-	return intel_init_ring_buffer(engine);
 }
 
-int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
+static void setup_bcs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
-
-	intel_ring_default_vfuncs(dev_priv, engine);
+	struct drm_i915_private *i915 = engine->i915;
 
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
-	if (IS_GEN(dev_priv, 6))
+	if (IS_GEN(i915, 6))
 		engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
 	else
 		engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
-
-	return intel_init_ring_buffer(engine);
 }
 
-int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
+static void setup_vecs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	GEM_BUG_ON(INTEL_GEN(dev_priv) < 7);
+	struct drm_i915_private *i915 = engine->i915;
 
-	intel_ring_default_vfuncs(dev_priv, engine);
+	GEM_BUG_ON(INTEL_GEN(i915) < 7);
 
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
@@ -2339,6 +2272,73 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	engine->irq_disable = hsw_vebox_irq_disable;
 
 	engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+}
+
+int intel_ring_submission_setup(struct intel_engine_cs *engine)
+{
+	setup_common(engine);
+
+	switch (engine->class) {
+	case RENDER_CLASS:
+		setup_rcs(engine);
+		break;
+	case VIDEO_DECODE_CLASS:
+		setup_vcs(engine);
+		break;
+	case COPY_ENGINE_CLASS:
+		setup_bcs(engine);
+		break;
+	case VIDEO_ENHANCEMENT_CLASS:
+		setup_vecs(engine);
+		break;
+	default:
+		MISSING_CASE(engine->class);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+int intel_ring_submission_init(struct intel_engine_cs *engine)
+{
+	struct i915_timeline *timeline;
+	struct intel_ring *ring;
+	int err;
+
+	timeline = i915_timeline_create(engine->i915, engine->status_page.vma);
+	if (IS_ERR(timeline)) {
+		err = PTR_ERR(timeline);
+		goto err;
+	}
+	GEM_BUG_ON(timeline->has_initial_breadcrumb);
+
+	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
+	i915_timeline_put(timeline);
+	if (IS_ERR(ring)) {
+		err = PTR_ERR(ring);
+		goto err;
+	}
+
+	err = intel_ring_pin(ring);
+	if (err)
+		goto err_ring;
 
-	return intel_init_ring_buffer(engine);
+	GEM_BUG_ON(engine->buffer);
+	engine->buffer = ring;
+
+	err = intel_engine_init_common(engine);
+	if (err)
+		goto err_unpin;
+
+	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
+
+	return 0;
+
+err_unpin:
+	intel_ring_unpin(ring);
+err_ring:
+	intel_ring_put(ring);
+err:
+	intel_engine_cleanup_common(engine);
+	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 364696221fd7..5751446a4b0b 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1077,7 +1077,8 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
 	struct drm_i915_private *i915 = engine->i915;
 	struct i915_wa_list *w = &engine->whitelist;
 
-	GEM_BUG_ON(engine->id != RCS0);
+	if (engine->class != RENDER_CLASS)
+		return;
 
 	wa_init_start(w, "whitelist");
 
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 21f413ff5b8e..85cdbfe1d989 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -239,7 +239,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 				    int id)
 {
 	struct mock_engine *engine;
-	int err;
 
 	GEM_BUG_ON(id >= I915_NUM_ENGINES);
 
@@ -265,37 +264,44 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.reset.finish = mock_reset_finish;
 	engine->base.cancel_requests = mock_cancel_requests;
 
-	if (i915_timeline_init(i915, &engine->base.timeline, NULL))
-		goto err_free;
-	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
-
-	intel_engine_init_breadcrumbs(&engine->base);
-	intel_engine_init_execlists(&engine->base);
-	intel_engine_init__pm(&engine->base);
-
 	/* fake hw queue */
 	spin_lock_init(&engine->hw_lock);
 	timer_setup(&engine->hw_delay, hw_delay_complete, 0);
 	INIT_LIST_HEAD(&engine->hw_queue);
 
-	engine->base.kernel_context =
-		intel_context_instance(i915->kernel_context, &engine->base);
-	if (IS_ERR(engine->base.kernel_context))
+	return &engine->base;
+}
+
+int mock_engine_init(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	int err;
+
+	intel_engine_init_breadcrumbs(engine);
+	intel_engine_init_execlists(engine);
+	intel_engine_init__pm(engine);
+
+	if (i915_timeline_init(i915, &engine->timeline, NULL))
 		goto err_breadcrumbs;
+	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
+
+	engine->kernel_context =
+		intel_context_instance(i915->kernel_context, engine);
+	if (IS_ERR(engine->kernel_context))
+		goto err_timeline;
 
-	err = intel_context_pin(engine->base.kernel_context);
-	intel_context_put(engine->base.kernel_context);
+	err = intel_context_pin(engine->kernel_context);
+	intel_context_put(engine->kernel_context);
 	if (err)
-		goto err_breadcrumbs;
+		goto err_timeline;
 
-	return &engine->base;
+	return 0;
 
+err_timeline:
+	i915_timeline_fini(&engine->timeline);
 err_breadcrumbs:
-	intel_engine_fini_breadcrumbs(&engine->base);
-	i915_timeline_fini(&engine->base.timeline);
-err_free:
-	kfree(engine);
-	return NULL;
+	intel_engine_fini_breadcrumbs(engine);
+	return -ENOMEM;
 }
 
 void mock_engine_flush(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h
index 44b35a85e9d1..3f9b698c49d2 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.h
+++ b/drivers/gpu/drm/i915/gt/mock_engine.h
@@ -42,6 +42,8 @@ struct mock_engine {
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 				    const char *name,
 				    int id);
+int mock_engine_init(struct intel_engine_cs *engine);
+
 void mock_engine_flush(struct intel_engine_cs *engine);
 void mock_engine_reset(struct intel_engine_cs *engine);
 void mock_engine_free(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d0211271f103..f80b181746f3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4540,6 +4540,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_ggtt;
 	}
 
+	ret = intel_engines_setup(dev_priv);
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
+		goto err_unlock;
+	}
+
 	ret = i915_gem_contexts_init(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index c072424c6b7c..e4033d0576c4 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -209,12 +209,16 @@ struct drm_i915_private *mock_gem_device(void)
 	mock_init_ggtt(i915, &i915->ggtt);
 
 	mkwrite_device_info(i915)->engine_mask = BIT(0);
-	i915->kernel_context = mock_context(i915, NULL);
-	if (!i915->kernel_context)
-		goto err_unlock;
 
 	i915->engine[RCS0] = mock_engine(i915, "mock", RCS0);
 	if (!i915->engine[RCS0])
+		goto err_unlock;
+
+	i915->kernel_context = mock_context(i915, NULL);
+	if (!i915->kernel_context)
+		goto err_engine;
+
+	if (mock_engine_init(i915->engine[RCS0]))
 		goto err_context;
 
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -225,6 +229,8 @@ struct drm_i915_private *mock_gem_device(void)
 
 err_context:
 	i915_gem_contexts_fini(i915);
+err_engine:
+	mock_engine_free(i915->engine[RCS0]);
 err_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_timelines_fini(i915);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 20/32] drm/i915: Switch back to an array of logical per-engine HW contexts
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (17 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 19/32] drm/i915: Split engine setup/init into two phases Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 21/32] drm/i915: Remove intel_context.active_link Chris Wilson
                   ` (15 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

We switched to a tree of per-engine HW context to accommodate the
introduction of virtual engines. However, we plan to also support
multiple instances of the same engine within the GEM context, defeating
our use of the engine as a key to looking up the HW context. Just
allocate a logical per-engine instance and always use an index into the
ctx->engines[]. Later on, this ctx->engines[] may be replaced by a user
specified map.

v2: Add for_each_gem_engine() helper to iterator within the engines lock
v3: intel_context_create_request() helper
v4: s/unsigned long/unsigned int/ 4 billion engines is quite enough.
v5: Push iterator locking to caller

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 112 ++++--------------
 drivers/gpu/drm/i915/gt/intel_context.h       |  27 +----
 drivers/gpu/drm/i915/gt/intel_context_types.h |   2 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |   3 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |   2 +-
 drivers/gpu/drm/i915/i915_gem.c               |  24 ++--
 drivers/gpu/drm/i915/i915_gem_context.c       |  99 ++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_context.h       |  58 +++++++++
 drivers/gpu/drm/i915/i915_gem_context_types.h |  40 ++++++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  70 +++++------
 drivers/gpu/drm/i915/i915_perf.c              |  80 +++++++------
 drivers/gpu/drm/i915/i915_request.c           |  15 +--
 drivers/gpu/drm/i915/intel_guc_submission.c   |  22 ++--
 .../gpu/drm/i915/selftests/i915_gem_context.c |   2 +-
 drivers/gpu/drm/i915/selftests/mock_context.c |  14 ++-
 16 files changed, 331 insertions(+), 241 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 15ac99c5dd4a..5e506e648454 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -17,7 +17,7 @@ static struct i915_global_context {
 	struct kmem_cache *slab_ce;
 } global;
 
-struct intel_context *intel_context_alloc(void)
+static struct intel_context *intel_context_alloc(void)
 {
 	return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
 }
@@ -28,104 +28,17 @@ void intel_context_free(struct intel_context *ce)
 }
 
 struct intel_context *
-intel_context_lookup(struct i915_gem_context *ctx,
+intel_context_create(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine)
 {
-	struct intel_context *ce = NULL;
-	struct rb_node *p;
-
-	spin_lock(&ctx->hw_contexts_lock);
-	p = ctx->hw_contexts.rb_node;
-	while (p) {
-		struct intel_context *this =
-			rb_entry(p, struct intel_context, node);
-
-		if (this->engine == engine) {
-			GEM_BUG_ON(this->gem_context != ctx);
-			ce = this;
-			break;
-		}
-
-		if (this->engine < engine)
-			p = p->rb_right;
-		else
-			p = p->rb_left;
-	}
-	spin_unlock(&ctx->hw_contexts_lock);
-
-	return ce;
-}
-
-struct intel_context *
-__intel_context_insert(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine,
-		       struct intel_context *ce)
-{
-	struct rb_node **p, *parent;
-	int err = 0;
-
-	spin_lock(&ctx->hw_contexts_lock);
-
-	parent = NULL;
-	p = &ctx->hw_contexts.rb_node;
-	while (*p) {
-		struct intel_context *this;
-
-		parent = *p;
-		this = rb_entry(parent, struct intel_context, node);
-
-		if (this->engine == engine) {
-			err = -EEXIST;
-			ce = this;
-			break;
-		}
-
-		if (this->engine < engine)
-			p = &parent->rb_right;
-		else
-			p = &parent->rb_left;
-	}
-	if (!err) {
-		rb_link_node(&ce->node, parent, p);
-		rb_insert_color(&ce->node, &ctx->hw_contexts);
-	}
-
-	spin_unlock(&ctx->hw_contexts_lock);
-
-	return ce;
-}
-
-void __intel_context_remove(struct intel_context *ce)
-{
-	struct i915_gem_context *ctx = ce->gem_context;
-
-	spin_lock(&ctx->hw_contexts_lock);
-	rb_erase(&ce->node, &ctx->hw_contexts);
-	spin_unlock(&ctx->hw_contexts_lock);
-}
-
-struct intel_context *
-intel_context_instance(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine)
-{
-	struct intel_context *ce, *pos;
-
-	ce = intel_context_lookup(ctx, engine);
-	if (likely(ce))
-		return intel_context_get(ce);
+	struct intel_context *ce;
 
 	ce = intel_context_alloc();
 	if (!ce)
 		return ERR_PTR(-ENOMEM);
 
 	intel_context_init(ce, ctx, engine);
-
-	pos = __intel_context_insert(ctx, engine, ce);
-	if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */
-		intel_context_free(ce);
-
-	GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
-	return intel_context_get(pos);
+	return ce;
 }
 
 int __intel_context_do_pin(struct intel_context *ce)
@@ -204,6 +117,8 @@ intel_context_init(struct intel_context *ce,
 		   struct i915_gem_context *ctx,
 		   struct intel_engine_cs *engine)
 {
+	GEM_BUG_ON(!engine->cops);
+
 	kref_init(&ce->ref);
 
 	ce->gem_context = ctx;
@@ -254,3 +169,18 @@ void intel_context_exit_engine(struct intel_context *ce)
 {
 	intel_engine_pm_put(ce->engine);
 }
+
+struct i915_request *intel_context_create_request(struct intel_context *ce)
+{
+	struct i915_request *rq;
+	int err;
+
+	err = intel_context_pin(ce);
+	if (unlikely(err))
+		return ERR_PTR(err);
+
+	rq = i915_request_create(ce);
+	intel_context_unpin(ce);
+
+	return rq;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index b746add6b71d..63392c88cd98 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,24 +12,16 @@
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
 
-struct intel_context *intel_context_alloc(void);
-void intel_context_free(struct intel_context *ce);
-
 void intel_context_init(struct intel_context *ce,
 			struct i915_gem_context *ctx,
 			struct intel_engine_cs *engine);
 
-/**
- * intel_context_lookup - Find the matching HW context for this (ctx, engine)
- * @ctx - the parent GEM context
- * @engine - the target HW engine
- *
- * May return NULL if the HW context hasn't been instantiated (i.e. unused).
- */
 struct intel_context *
-intel_context_lookup(struct i915_gem_context *ctx,
+intel_context_create(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine);
 
+void intel_context_free(struct intel_context *ce);
+
 /**
  * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
  * @ce - the context
@@ -71,17 +63,6 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
 	mutex_unlock(&ce->pin_mutex);
 }
 
-struct intel_context *
-__intel_context_insert(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine,
-		       struct intel_context *ce);
-void
-__intel_context_remove(struct intel_context *ce);
-
-struct intel_context *
-intel_context_instance(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine);
-
 int __intel_context_do_pin(struct intel_context *ce);
 
 static inline int intel_context_pin(struct intel_context *ce)
@@ -144,4 +125,6 @@ static inline void intel_context_timeline_unlock(struct intel_context *ce)
 	mutex_unlock(&ce->ring->timeline->mutex);
 }
 
+struct i915_request *intel_context_create_request(struct intel_context *ce);
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index f02d27734e3b..3579c2708321 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -10,7 +10,6 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
-#include <linux/rbtree.h>
 #include <linux/types.h>
 
 #include "i915_active_types.h"
@@ -61,7 +60,6 @@ struct intel_context {
 	struct i915_active_request active_tracker;
 
 	const struct intel_context_ops *ops;
-	struct rb_node node;
 
 	/** sseu: Control eu/slice partitioning */
 	struct intel_sseu sseu;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 6cb90137b2fa..2a1c438bf0ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -716,7 +716,7 @@ static int pin_context(struct i915_gem_context *ctx,
 	struct intel_context *ce;
 	int err;
 
-	ce = intel_context_instance(ctx, engine);
+	ce = i915_gem_context_get_engine(ctx, engine->id);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 85cdbfe1d989..2941916b37bf 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -23,6 +23,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_gem_context.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
 
@@ -286,7 +287,7 @@ int mock_engine_init(struct intel_engine_cs *engine)
 	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
 
 	engine->kernel_context =
-		intel_context_instance(i915->kernel_context, engine);
+		i915_gem_context_get_engine(i915->kernel_context, engine->id);
 	if (IS_ERR(engine->kernel_context))
 		goto err_timeline;
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 606fc2713240..8b6574e1b495 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1188,7 +1188,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		INIT_LIST_HEAD(&s->workload_q_head[i]);
 		s->shadow[i] = ERR_PTR(-EINVAL);
 
-		ce = intel_context_instance(ctx, engine);
+		ce = i915_gem_context_get_engine(ctx, i);
 		if (IS_ERR(ce)) {
 			ret = PTR_ERR(ce);
 			goto out_shadow_ctx;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f80b181746f3..f8287754c398 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4312,8 +4312,9 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
 
 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 {
-	struct i915_gem_context *ctx;
 	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
 	enum intel_engine_id id;
 	int err = 0;
 
@@ -4330,18 +4331,21 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	e = i915_gem_context_lock_engines(ctx);
+
 	for_each_engine(engine, i915, id) {
+		struct intel_context *ce = e->engines[id];
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, ctx);
+		rq = intel_context_create_request(ce);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
-			goto out_ctx;
+			goto err_active;
 		}
 
 		err = 0;
-		if (engine->init_context)
-			err = engine->init_context(rq);
+		if (rq->engine->init_context)
+			err = rq->engine->init_context(rq);
 
 		i915_request_add(rq);
 		if (err)
@@ -4355,15 +4359,10 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	}
 
 	for_each_engine(engine, i915, id) {
-		struct intel_context *ce;
-		struct i915_vma *state;
+		struct intel_context *ce = e->engines[id];
+		struct i915_vma *state = ce->state;
 		void *vaddr;
 
-		ce = intel_context_lookup(ctx, engine);
-		if (!ce)
-			continue;
-
-		state = ce->state;
 		if (!state)
 			continue;
 
@@ -4419,6 +4418,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	}
 
 out_ctx:
+	i915_gem_context_unlock_engines(ctx);
 	i915_gem_context_set_closed(ctx);
 	i915_gem_context_put(ctx);
 	return err;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 1e1770047cc2..a1e1874742a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -150,7 +150,7 @@ lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance)
 	if (!engine)
 		return ERR_PTR(-EINVAL);
 
-	return intel_context_instance(ctx, engine);
+	return i915_gem_context_get_engine(ctx, engine->id);
 }
 
 static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
@@ -242,10 +242,51 @@ static void release_hw_id(struct i915_gem_context *ctx)
 	mutex_unlock(&i915->contexts.mutex);
 }
 
-static void i915_gem_context_free(struct i915_gem_context *ctx)
+static void __free_engines(struct i915_gem_engines *e, unsigned int count)
+{
+	while (count--) {
+		if (!e->engines[count])
+			continue;
+
+		intel_context_put(e->engines[count]);
+	}
+	kfree(e);
+}
+
+static void free_engines(struct i915_gem_engines *e)
+{
+	__free_engines(e, e->num_engines);
+}
+
+static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 {
-	struct intel_context *it, *n;
+	struct intel_engine_cs *engine;
+	struct i915_gem_engines *e;
+	enum intel_engine_id id;
+
+	e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+
+	e->i915 = ctx->i915;
+	for_each_engine(engine, ctx->i915, id) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(ctx, engine);
+		if (IS_ERR(ce)) {
+			__free_engines(e, id);
+			return ERR_CAST(ce);
+		}
+
+		e->engines[id] = ce;
+	}
+	e->num_engines = id;
+
+	return e;
+}
 
+static void i915_gem_context_free(struct i915_gem_context *ctx)
+{
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 	GEM_BUG_ON(!list_empty(&ctx->active_engines));
@@ -253,8 +294,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 
-	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
-		intel_context_put(it);
+	free_engines(rcu_access_pointer(ctx->engines));
+	mutex_destroy(&ctx->engines_mutex);
 
 	if (ctx->timeline)
 		i915_timeline_put(ctx->timeline);
@@ -363,6 +404,8 @@ static struct i915_gem_context *
 __create_context(struct drm_i915_private *dev_priv)
 {
 	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
+	int err;
 	int i;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -376,8 +419,13 @@ __create_context(struct drm_i915_private *dev_priv)
 	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
-	ctx->hw_contexts = RB_ROOT;
-	spin_lock_init(&ctx->hw_contexts_lock);
+	mutex_init(&ctx->engines_mutex);
+	e = default_engines(ctx);
+	if (IS_ERR(e)) {
+		err = PTR_ERR(e);
+		goto err_free;
+	}
+	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
@@ -399,6 +447,10 @@ __create_context(struct drm_i915_private *dev_priv)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
 	return ctx;
+
+err_free:
+	kfree(ctx);
+	return ERR_PTR(err);
 }
 
 static struct i915_hw_ppgtt *
@@ -862,7 +914,8 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 {
 	struct drm_i915_private *i915 = ctx->i915;
 	struct context_barrier_task *cb;
-	struct intel_context *ce, *next;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
 	int err = 0;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -875,20 +928,22 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	i915_active_init(i915, &cb->base, cb_retire);
 	i915_active_acquire(&cb->base);
 
-	rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
-		struct intel_engine_cs *engine = ce->engine;
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		struct i915_request *rq;
 
-		if (!(engine->mask & engines))
+		if (!(ce->engine->mask & engines))
+			continue;
+
+		if (!intel_context_is_pinned(ce))
 			continue;
 
 		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
-				       engine->mask)) {
+				       ce->engine->mask)) {
 			err = -ENXIO;
 			break;
 		}
 
-		rq = i915_request_alloc(engine, ctx);
+		rq = intel_context_create_request(ce);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			break;
@@ -904,6 +959,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (err)
 			break;
 	}
+	i915_gem_context_unlock_engines(ctx);
 
 	cb->task = err ? NULL : task; /* caller needs to unwind instead */
 	cb->data = data;
@@ -1739,6 +1795,23 @@ int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
 	return err;
 }
 
+/* GEM context-engines iterator: for_each_gem_engine() */
+struct intel_context *
+i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
+{
+	const struct i915_gem_engines *e = it->engines;
+	struct intel_context *ctx;
+
+	do {
+		if (it->idx >= e->num_engines)
+			return NULL;
+
+		ctx = e->engines[it->idx++];
+	} while (!ctx);
+
+	return ctx;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_context.c"
 #include "selftests/i915_gem_context.c"
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 5a8e080499fb..272e183ebc0c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -176,6 +176,64 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
 	kref_put(&ctx->ref, i915_gem_context_release);
 }
 
+static inline struct i915_gem_engines *
+i915_gem_context_engines(struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->engines,
+					 lockdep_is_held(&ctx->engines_mutex));
+}
+
+static inline struct i915_gem_engines *
+i915_gem_context_lock_engines(struct i915_gem_context *ctx)
+	__acquires(&ctx->engines_mutex)
+{
+	mutex_lock(&ctx->engines_mutex);
+	return i915_gem_context_engines(ctx);
+}
+
+static inline void
+i915_gem_context_unlock_engines(struct i915_gem_context *ctx)
+	__releases(&ctx->engines_mutex)
+{
+	mutex_unlock(&ctx->engines_mutex);
+}
+
+static inline struct intel_context *
+i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx)
+{
+	return i915_gem_context_engines(ctx)->engines[idx];
+}
+
+static inline struct intel_context *
+i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx)
+{
+	struct intel_context *ce = ERR_PTR(-EINVAL);
+
+	rcu_read_lock(); {
+		struct i915_gem_engines *e = rcu_dereference(ctx->engines);
+		if (likely(idx < e->num_engines && e->engines[idx]))
+			ce = intel_context_get(e->engines[idx]);
+	} rcu_read_unlock();
+
+	return ce;
+}
+
+static inline void
+i915_gem_engines_iter_init(struct i915_gem_engines_iter *it,
+			   struct i915_gem_engines *engines)
+{
+	GEM_BUG_ON(!engines);
+	it->engines = engines;
+	it->idx = 0;
+}
+
+struct intel_context *
+i915_gem_engines_iter_next(struct i915_gem_engines_iter *it);
+
+#define for_each_gem_engine(ce, engines, it) \
+	for (i915_gem_engines_iter_init(&(it), (engines)); \
+	     ((ce) = i915_gem_engines_iter_next(&(it)));)
+
 struct i915_lut_handle *i915_lut_handle_alloc(void);
 void i915_lut_handle_free(struct i915_lut_handle *lut);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index d282a6ab3b9f..5f84618cf7db 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -29,6 +29,18 @@ struct i915_hw_ppgtt;
 struct i915_timeline;
 struct intel_ring;
 
+struct i915_gem_engines {
+	struct rcu_work rcu;
+	struct drm_i915_private *i915;
+	unsigned int num_engines;
+	struct intel_context *engines[];
+};
+
+struct i915_gem_engines_iter {
+	unsigned int idx;
+	const struct i915_gem_engines *engines;
+};
+
 /**
  * struct i915_gem_context - client state
  *
@@ -42,6 +54,30 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	/**
+	 * @engines: User defined engines for this context
+	 *
+	 * Various uAPI offer the ability to lookup up an
+	 * index from this array to select an engine operate on.
+	 *
+	 * Multiple logically distinct instances of the same engine
+	 * may be defined in the array, as well as composite virtual
+	 * engines.
+	 *
+	 * Execbuf uses the I915_EXEC_RING_MASK as an index into this
+	 * array to select which HW context + engine to execute on. For
+	 * the default array, the user_ring_map[] is used to translate
+	 * the legacy uABI onto the approprate index (e.g. both
+	 * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same
+	 * context, and I915_EXEC_BSD is weird). For a use defined
+	 * array, execbuf uses I915_EXEC_RING_MASK as a plain index.
+	 *
+	 * User defined by I915_CONTEXT_PARAM_ENGINE (when the
+	 * CONTEXT_USER_ENGINES flag is set).
+	 */
+	struct i915_gem_engines __rcu *engines;
+	struct mutex engines_mutex; /* guards writes to engines */
+
 	struct i915_timeline *timeline;
 
 	/**
@@ -134,10 +170,6 @@ struct i915_gem_context {
 
 	struct i915_sched_attr sched;
 
-	/** hw_contexts: per-engine logical HW state */
-	struct rb_root hw_contexts;
-	spinlock_t hw_contexts_lock;
-
 	/** ring_size: size for allocating the per-engine ring buffer */
 	u32 ring_size;
 	/** desc_template: invariant fields for the HW context descriptor */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 166a33c0d3ed..679f7c1561ba 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2076,9 +2076,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
 	return file_priv->bsd_engine;
 }
 
-#define I915_USER_RINGS (4)
-
-static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
+static const enum intel_engine_id user_ring_map[] = {
 	[I915_EXEC_DEFAULT]	= RCS0,
 	[I915_EXEC_RENDER]	= RCS0,
 	[I915_EXEC_BLT]		= BCS0,
@@ -2086,10 +2084,8 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
 	[I915_EXEC_VEBOX]	= VECS0
 };
 
-static int eb_pin_context(struct i915_execbuffer *eb,
-			  struct intel_engine_cs *engine)
+static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 {
-	struct intel_context *ce;
 	int err;
 
 	/*
@@ -2100,21 +2096,16 @@ static int eb_pin_context(struct i915_execbuffer *eb,
 	if (err)
 		return err;
 
-	ce = intel_context_instance(eb->gem_context, engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
-
 	/*
 	 * Pinning the contexts may generate requests in order to acquire
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
 	err = intel_context_pin(ce);
-	intel_context_put(ce);
 	if (err)
 		return err;
 
-	eb->engine = engine;
+	eb->engine = ce->engine;
 	eb->context = ce;
 	return 0;
 }
@@ -2124,25 +2115,19 @@ static void eb_unpin_context(struct i915_execbuffer *eb)
 	intel_context_unpin(eb->context);
 }
 
-static int
-eb_select_engine(struct i915_execbuffer *eb,
-		 struct drm_file *file,
-		 struct drm_i915_gem_execbuffer2 *args)
+static unsigned int
+eb_select_legacy_ring(struct i915_execbuffer *eb,
+		      struct drm_file *file,
+		      struct drm_i915_gem_execbuffer2 *args)
 {
 	struct drm_i915_private *i915 = eb->i915;
 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
-	struct intel_engine_cs *engine;
-
-	if (user_ring_id > I915_USER_RINGS) {
-		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
-		return -EINVAL;
-	}
 
-	if ((user_ring_id != I915_EXEC_BSD) &&
-	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
+	if (user_ring_id != I915_EXEC_BSD &&
+	    (args->flags & I915_EXEC_BSD_MASK)) {
 		DRM_DEBUG("execbuf with non bsd ring but with invalid "
 			  "bsd dispatch flags: %d\n", (int)(args->flags));
-		return -EINVAL;
+		return -1;
 	}
 
 	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) {
@@ -2157,20 +2142,39 @@ eb_select_engine(struct i915_execbuffer *eb,
 		} else {
 			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
 				  bsd_idx);
-			return -EINVAL;
+			return -1;
 		}
 
-		engine = i915->engine[_VCS(bsd_idx)];
-	} else {
-		engine = i915->engine[user_ring_map[user_ring_id]];
+		return _VCS(bsd_idx);
 	}
 
-	if (!engine) {
-		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
-		return -EINVAL;
+	if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
+		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
+		return -1;
 	}
 
-	return eb_pin_context(eb, engine);
+	return user_ring_map[user_ring_id];
+}
+
+static int
+eb_select_engine(struct i915_execbuffer *eb,
+		 struct drm_file *file,
+		 struct drm_i915_gem_execbuffer2 *args)
+{
+	struct intel_context *ce;
+	unsigned int idx;
+	int err;
+
+	idx = eb_select_legacy_ring(eb, file, args);
+
+	ce = i915_gem_context_get_engine(eb->gem_context, idx);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = eb_pin_context(eb, ce);
+	intel_context_put(ce);
+
+	return err;
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index afaeabe5e531..c4995d5a16d2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1203,35 +1203,35 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
 					    struct i915_gem_context *ctx)
 {
-	struct intel_engine_cs *engine = i915->engine[RCS0];
+	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
 	int err;
 
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
-
 	err = i915_mutex_lock_interruptible(&i915->drm);
-	if (err) {
-		intel_context_put(ce);
+	if (err)
 		return ERR_PTR(err);
+
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		if (ce->engine->class != RENDER_CLASS)
+			continue;
+
+		/*
+		 * As the ID is the gtt offset of the context's vma we
+		 * pin the vma to ensure the ID remains fixed.
+		 */
+		err = intel_context_pin(ce);
+		if (err == 0) {
+			i915->perf.oa.pinned_ctx = ce;
+			break;
+		}
 	}
+	i915_gem_context_unlock_engines(ctx);
 
-	/*
-	 * As the ID is the gtt offset of the context's vma we
-	 * pin the vma to ensure the ID remains fixed.
-	 *
-	 * NB: implied RCS engine...
-	 */
-	err = intel_context_pin(ce);
 	mutex_unlock(&i915->drm.struct_mutex);
-	intel_context_put(ce);
 	if (err)
 		return ERR_PTR(err);
 
-	i915->perf.oa.pinned_ctx = ce;
-
-	return ce;
+	return i915->perf.oa.pinned_ctx;
 }
 
 /**
@@ -1717,7 +1717,6 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
 static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 				       const struct i915_oa_config *oa_config)
 {
-	struct intel_engine_cs *engine = dev_priv->engine[RCS0];
 	unsigned int map_type = i915_coherent_map_type(dev_priv);
 	struct i915_gem_context *ctx;
 	struct i915_request *rq;
@@ -1746,30 +1745,43 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 
 	/* Update all contexts now that we've stalled the submission. */
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
-		struct intel_context *ce = intel_context_lookup(ctx, engine);
-		u32 *regs;
-
-		/* OA settings will be set upon first use */
-		if (!ce || !ce->state)
-			continue;
-
-		regs = i915_gem_object_pin_map(ce->state->obj, map_type);
-		if (IS_ERR(regs))
-			return PTR_ERR(regs);
+		struct i915_gem_engines_iter it;
+		struct intel_context *ce;
+
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx),
+				    it) {
+			u32 *regs;
+
+			if (ce->engine->class != RENDER_CLASS)
+				continue;
+
+			/* OA settings will be set upon first use */
+			if (!ce->state)
+				continue;
+
+			regs = i915_gem_object_pin_map(ce->state->obj,
+						       map_type);
+			if (IS_ERR(regs)) {
+				i915_gem_context_unlock_engines(ctx);
+				return PTR_ERR(regs);
+			}
 
-		ce->state->obj->mm.dirty = true;
-		regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
+			ce->state->obj->mm.dirty = true;
+			regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
 
-		gen8_update_reg_state_unlocked(ce, regs, oa_config);
+			gen8_update_reg_state_unlocked(ce, regs, oa_config);
 
-		i915_gem_object_unpin_map(ce->state->obj);
+			i915_gem_object_unpin_map(ce->state->obj);
+		}
+		i915_gem_context_unlock_engines(ctx);
 	}
 
 	/*
 	 * Apply the configuration by doing one context restore of the edited
 	 * context image.
 	 */
-	rq = i915_request_create(engine->kernel_context);
+	rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 975491f763df..dd7e1d4f88cd 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -786,7 +786,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	struct drm_i915_private *i915 = engine->i915;
 	struct intel_context *ce;
 	struct i915_request *rq;
-	int err;
 
 	/*
 	 * Preempt contexts are reserved for exclusive use to inject a
@@ -800,21 +799,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ce = intel_context_instance(ctx, engine);
+	ce = i915_gem_context_get_engine(ctx, engine->id);
 	if (IS_ERR(ce))
 		return ERR_CAST(ce);
 
-	err = intel_context_pin(ce);
-	if (err) {
-		rq = ERR_PTR(err);
-		goto err_put;
-	}
-
-	rq = i915_request_create(ce);
-	intel_context_unpin(ce);
-
-err_put:
+	rq = intel_context_create_request(ce);
 	intel_context_put(ce);
+
 	return rq;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 1b6d6403ee92..4c814344809c 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -364,11 +364,10 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
 static void guc_stage_desc_init(struct intel_guc_client *client)
 {
 	struct intel_guc *guc = client->guc;
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx = client->owner;
+	struct i915_gem_engines_iter it;
 	struct guc_stage_desc *desc;
-	unsigned int tmp;
+	struct intel_context *ce;
 	u32 gfx_addr;
 
 	desc = __get_stage_desc(client);
@@ -382,10 +381,11 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 	desc->priority = client->priority;
 	desc->db_id = client->doorbell_id;
 
-	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
-		struct intel_context *ce = intel_context_lookup(ctx, engine);
-		u32 guc_engine_id = engine->guc_id;
-		struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		struct guc_execlist_context *lrc;
+
+		if (!(ce->engine->mask & client->engines))
+			continue;
 
 		/* TODO: We have a design issue to be solved here. Only when we
 		 * receive the first batch, we know which engine is used by the
@@ -394,7 +394,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 		 * for now who owns a GuC client. But for future owner of GuC
 		 * client, need to make sure lrc is pinned prior to enter here.
 		 */
-		if (!ce || !ce->state)
+		if (!ce->state)
 			break;	/* XXX: continue? */
 
 		/*
@@ -404,6 +404,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 		 * Instead, the GuC uses the LRCA of the user mode context (see
 		 * guc_add_request below).
 		 */
+		lrc = &desc->lrc[ce->engine->guc_id];
 		lrc->context_desc = lower_32_bits(ce->lrc_desc);
 
 		/* The state page is after PPHWSP */
@@ -414,15 +415,16 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 		 * here. In proxy submission, it wants the stage id
 		 */
 		lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) |
-				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
+				(ce->engine->guc_id << GUC_ELC_ENGINE_OFFSET);
 
 		lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
 		lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
 		lrc->ring_next_free_location = lrc->ring_begin;
 		lrc->ring_current_tail_pointer_value = 0;
 
-		desc->engines_used |= (1 << guc_engine_id);
+		desc->engines_used |= BIT(ce->engine->guc_id);
 	}
+	i915_gem_context_unlock_engines(ctx);
 
 	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
 			 client->engines, desc->engines_used);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 214d1fd2f4dc..7fd224a4ca4c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1094,7 +1094,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 
 	wakeref = intel_runtime_pm_get(i915);
 
-	ce = intel_context_instance(ctx, i915->engine[RCS0]);
+	ce = i915_gem_context_get_engine(ctx, RCS0);
 	if (IS_ERR(ce)) {
 		ret = PTR_ERR(ce);
 		goto out_rpm;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 0426093bf1d9..71c750693585 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -30,6 +30,7 @@ mock_context(struct drm_i915_private *i915,
 	     const char *name)
 {
 	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -40,8 +41,11 @@ mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
-	ctx->hw_contexts = RB_ROOT;
-	spin_lock_init(&ctx->hw_contexts_lock);
+	mutex_init(&ctx->engines_mutex);
+	e = default_engines(ctx);
+	if (IS_ERR(e))
+		goto err_free;
+	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
@@ -51,7 +55,7 @@ mock_context(struct drm_i915_private *i915,
 
 	ret = i915_gem_context_pin_hw_id(ctx);
 	if (ret < 0)
-		goto err_handles;
+		goto err_engines;
 
 	if (name) {
 		struct i915_hw_ppgtt *ppgtt;
@@ -69,7 +73,9 @@ mock_context(struct drm_i915_private *i915,
 
 	return ctx;
 
-err_handles:
+err_engines:
+	free_engines(rcu_access_pointer(ctx->engines));
+err_free:
 	kfree(ctx);
 	return NULL;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 21/32] drm/i915: Remove intel_context.active_link
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (18 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 20/32] drm/i915: Switch back to an array of logical per-engine HW contexts Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  9:47   ` Tvrtko Ursulin
  2019-04-17  7:56 ` [PATCH 22/32] drm/i915: Move i915_request_alloc into selftests/ Chris Wilson
                   ` (14 subsequent siblings)
  34 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

We no longer need to track the active intel_contexts within each engine,
allowing us to drop a tricky mutex_lock from inside unpin (which may
occur inside fs_reclaim).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context.c           | 11 +----------
 drivers/gpu/drm/i915/gt/intel_context_types.h     |  1 -
 drivers/gpu/drm/i915/i915_debugfs.c               | 11 +++++++++--
 drivers/gpu/drm/i915/i915_gem_context.c           |  2 --
 drivers/gpu/drm/i915/i915_gem_context_types.h     |  1 -
 drivers/gpu/drm/i915/selftests/i915_gem_context.c |  1 -
 drivers/gpu/drm/i915/selftests/mock_context.c     |  1 -
 7 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 5e506e648454..1f1761fc6597 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -49,7 +49,6 @@ int __intel_context_do_pin(struct intel_context *ce)
 		return -EINTR;
 
 	if (likely(!atomic_read(&ce->pin_count))) {
-		struct i915_gem_context *ctx = ce->gem_context;
 		intel_wakeref_t wakeref;
 
 		err = 0;
@@ -58,11 +57,7 @@ int __intel_context_do_pin(struct intel_context *ce)
 		if (err)
 			goto err;
 
-		i915_gem_context_get(ctx);
-
-		mutex_lock(&ctx->mutex);
-		list_add(&ce->active_link, &ctx->active_engines);
-		mutex_unlock(&ctx->mutex);
+		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
 
 		intel_context_get(ce);
 		smp_mb__before_atomic(); /* flush pin before it is visible */
@@ -91,10 +86,6 @@ void intel_context_unpin(struct intel_context *ce)
 	if (likely(atomic_dec_and_test(&ce->pin_count))) {
 		ce->ops->unpin(ce);
 
-		mutex_lock(&ce->gem_context->mutex);
-		list_del(&ce->active_link);
-		mutex_unlock(&ce->gem_context->mutex);
-
 		i915_gem_context_put(ce->gem_context);
 		intel_context_put(ce);
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 3579c2708321..d5a7dbd0daee 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -38,7 +38,6 @@ struct intel_context {
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *active;
 
-	struct list_head active_link;
 	struct list_head signal_link;
 	struct list_head signals;
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 00d3ff746eb1..466becbb99c6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -34,6 +34,7 @@
 
 #include "gt/intel_reset.h"
 
+#include "i915_gem_context.h"
 #include "intel_dp.h"
 #include "intel_drv.h"
 #include "intel_fbc.h"
@@ -396,14 +397,17 @@ static void print_context_stats(struct seq_file *m,
 	struct i915_gem_context *ctx;
 
 	list_for_each_entry(ctx, &i915->contexts.list, link) {
+		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
-		list_for_each_entry(ce, &ctx->active_engines, active_link) {
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx), it) {
 			if (ce->state)
 				per_file_stats(0, ce->state->obj, &kstats);
 			if (ce->ring)
 				per_file_stats(0, ce->ring->vma->obj, &kstats);
 		}
+		i915_gem_context_unlock_engines(ctx);
 
 		if (!IS_ERR_OR_NULL(ctx->file_priv)) {
 			struct file_stats stats = { .vm = &ctx->ppgtt->vm, };
@@ -1893,6 +1897,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		return ret;
 
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
 		seq_puts(m, "HW context ");
@@ -1917,7 +1922,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		seq_putc(m, ctx->remap_slice ? 'R' : 'r');
 		seq_putc(m, '\n');
 
-		list_for_each_entry(ce, &ctx->active_engines, active_link) {
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx), it) {
 			seq_printf(m, "%s: ", ce->engine->name);
 			if (ce->state)
 				describe_obj(m, ce->state->obj);
@@ -1925,6 +1931,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
 				describe_ctx_ring(m, ce->ring);
 			seq_putc(m, '\n');
 		}
+		i915_gem_context_unlock_engines(ctx);
 
 		seq_putc(m, '\n');
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index a1e1874742a3..5b9feeb8d006 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -289,7 +289,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
-	GEM_BUG_ON(!list_empty(&ctx->active_engines));
 
 	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
@@ -416,7 +415,6 @@ __create_context(struct drm_i915_private *dev_priv)
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
 	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
-	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
 	mutex_init(&ctx->engines_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index 5f84618cf7db..d5cb4f121aad 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -165,7 +165,6 @@ struct i915_gem_context {
 	atomic_t hw_id_pin_count;
 	struct list_head hw_id_link;
 
-	struct list_head active_engines;
 	struct mutex mutex;
 
 	struct i915_sched_attr sched;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 7fd224a4ca4c..deedd1898fe5 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1675,7 +1675,6 @@ static int mock_context_barrier(void *arg)
 		goto out;
 	}
 	i915_request_add(rq);
-	GEM_BUG_ON(list_empty(&ctx->active_engines));
 
 	counter = 0;
 	context_barrier_inject_fault = BIT(RCS0);
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 71c750693585..10e67c931ed1 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -50,7 +50,6 @@ mock_context(struct drm_i915_private *i915,
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
-	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
 	ret = i915_gem_context_pin_hw_id(ctx);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 22/32] drm/i915: Move i915_request_alloc into selftests/
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (19 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 21/32] drm/i915: Remove intel_context.active_link Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 23/32] drm/i915: Allow multiple user handles to the same VM Chris Wilson
                   ` (13 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Having transitioned GEM over to using intel_context as its primary means
of tracking the GEM context and engine combined and using
i915_request_create(), we can move the older i915_request_alloc()
helper function into selftests/ where the remaining users are confined.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |  1 +
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  9 +++--
 drivers/gpu/drm/i915/gt/selftest_lrc.c        | 13 ++++---
 .../gpu/drm/i915/gt/selftest_workarounds.c    | 15 +++-----
 drivers/gpu/drm/i915/i915_request.c           | 38 -------------------
 drivers/gpu/drm/i915/i915_request.h           |  3 --
 drivers/gpu/drm/i915/selftests/huge_pages.c   |  3 +-
 drivers/gpu/drm/i915/selftests/i915_gem.c     |  5 ++-
 .../gpu/drm/i915/selftests/i915_gem_context.c | 13 ++++---
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  3 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |  4 +-
 .../gpu/drm/i915/selftests/igt_gem_utils.c    | 34 +++++++++++++++++
 .../gpu/drm/i915/selftests/igt_gem_utils.h    | 17 +++++++++
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |  3 +-
 drivers/gpu/drm/i915/selftests/mock_request.c |  3 +-
 15 files changed, 89 insertions(+), 75 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/igt_gem_utils.c
 create mode 100644 drivers/gpu/drm/i915/selftests/igt_gem_utils.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index dd8d923aa1c6..58643373495c 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -193,6 +193,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	selftests/i915_random.o \
 	selftests/i915_selftest.o \
 	selftests/igt_flush_test.o \
+	selftests/igt_gem_utils.o \
 	selftests/igt_live_test.o \
 	selftests/igt_reset.o \
 	selftests/igt_spinner.o
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 6004d6907e9c..c206748629dc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -29,6 +29,7 @@
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
 #include "selftests/igt_flush_test.h"
+#include "selftests/igt_gem_utils.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_wedge_me.h"
 
@@ -175,7 +176,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 	if (err)
 		goto unpin_vma;
 
-	rq = i915_request_alloc(engine, h->ctx);
+	rq = igt_request_alloc(h->ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto unpin_hws;
@@ -455,7 +456,7 @@ static int igt_reset_nop(void *arg)
 			for (i = 0; i < 16; i++) {
 				struct i915_request *rq;
 
-				rq = i915_request_alloc(engine, ctx);
+				rq = igt_request_alloc(ctx, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					break;
@@ -554,7 +555,7 @@ static int igt_reset_nop_engine(void *arg)
 			for (i = 0; i < 16; i++) {
 				struct i915_request *rq;
 
-				rq = i915_request_alloc(engine, ctx);
+				rq = igt_request_alloc(ctx, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					break;
@@ -800,7 +801,7 @@ static int active_engine(void *data)
 		struct i915_request *new;
 
 		mutex_lock(&engine->i915->drm.struct_mutex);
-		new = i915_request_alloc(engine, ctx[idx]);
+		new = igt_request_alloc(ctx[idx], engine);
 		if (IS_ERR(new)) {
 			mutex_unlock(&engine->i915->drm.struct_mutex);
 			err = PTR_ERR(new);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index cd0551f97c2f..84538f69185b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -10,6 +10,7 @@
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
 #include "selftests/igt_flush_test.h"
+#include "selftests/igt_gem_utils.h"
 #include "selftests/igt_live_test.h"
 #include "selftests/igt_spinner.h"
 #include "selftests/mock_context.h"
@@ -148,7 +149,7 @@ static int live_busywait_preempt(void *arg)
 		 * fails, we hang instead.
 		 */
 
-		lo = i915_request_alloc(engine, ctx_lo);
+		lo = igt_request_alloc(ctx_lo, engine);
 		if (IS_ERR(lo)) {
 			err = PTR_ERR(lo);
 			goto err_vma;
@@ -192,7 +193,7 @@ static int live_busywait_preempt(void *arg)
 			goto err_vma;
 		}
 
-		hi = i915_request_alloc(engine, ctx_hi);
+		hi = igt_request_alloc(ctx_hi, engine);
 		if (IS_ERR(hi)) {
 			err = PTR_ERR(hi);
 			goto err_vma;
@@ -857,13 +858,13 @@ static int live_chain_preempt(void *arg)
 			i915_request_add(rq);
 
 			for (i = 0; i < count; i++) {
-				rq = i915_request_alloc(engine, lo.ctx);
+				rq = igt_request_alloc(lo.ctx, engine);
 				if (IS_ERR(rq))
 					goto err_wedged;
 				i915_request_add(rq);
 			}
 
-			rq = i915_request_alloc(engine, hi.ctx);
+			rq = igt_request_alloc(hi.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
 			i915_request_add(rq);
@@ -882,7 +883,7 @@ static int live_chain_preempt(void *arg)
 			}
 			igt_spinner_end(&lo.spin);
 
-			rq = i915_request_alloc(engine, lo.ctx);
+			rq = igt_request_alloc(lo.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
 			i915_request_add(rq);
@@ -1087,7 +1088,7 @@ static int smoke_submit(struct preempt_smoke *smoke,
 
 	ctx->sched.priority = prio;
 
-	rq = i915_request_alloc(smoke->engine, ctx);
+	rq = igt_request_alloc(ctx, smoke->engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto unpin;
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 461d91737077..0521c1fbcee1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -8,6 +8,7 @@
 #include "intel_reset.h"
 
 #include "selftests/igt_flush_test.h"
+#include "selftests/igt_gem_utils.h"
 #include "selftests/igt_reset.h"
 #include "selftests/igt_spinner.h"
 #include "selftests/igt_wedge_me.h"
@@ -102,7 +103,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (err)
 		goto err_obj;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_pin;
@@ -511,7 +512,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 		i915_gem_object_unpin_map(batch->obj);
 		i915_gem_chipset_flush(ctx->i915);
 
-		rq = i915_request_alloc(engine, ctx);
+		rq = igt_request_alloc(ctx, engine);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto out_batch;
@@ -701,14 +702,11 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
 				      struct intel_engine_cs *engine,
 				      struct i915_vma *results)
 {
-	intel_wakeref_t wakeref;
 	struct i915_request *rq;
 	u32 srm, *cs;
 	int err, i;
 
-	rq = ERR_PTR(-ENODEV);
-	with_intel_runtime_pm(engine->i915, wakeref)
-		rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -748,7 +746,6 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
 static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
 				       struct intel_engine_cs *engine)
 {
-	intel_wakeref_t wakeref;
 	struct i915_request *rq;
 	struct i915_vma *batch;
 	int i, err;
@@ -774,9 +771,7 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
 	i915_gem_object_flush_map(batch->obj);
 	i915_gem_chipset_flush(ctx->i915);
 
-	rq = ERR_PTR(-ENODEV);
-	with_intel_runtime_pm(engine->i915, wakeref)
-		rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index dd7e1d4f88cd..46f4fc2a8840 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -771,44 +771,6 @@ i915_request_create(struct intel_context *ce)
 	return rq;
 }
 
-/**
- * i915_request_alloc - allocate a request structure
- *
- * @engine: engine that we wish to issue the request on.
- * @ctx: context that the request will be associated with.
- *
- * Returns a pointer to the allocated request if successful,
- * or an error code if not.
- */
-struct i915_request *
-i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = engine->i915;
-	struct intel_context *ce;
-	struct i915_request *rq;
-
-	/*
-	 * Preempt contexts are reserved for exclusive use to inject a
-	 * preemption context switch. They are never to be used for any trivial
-	 * request!
-	 */
-	GEM_BUG_ON(ctx == i915->preempt_context);
-
-	/*
-	 * Pinning the contexts may generate requests in order to acquire
-	 * GGTT space, so do this first before we reserve a seqno for
-	 * ourselves.
-	 */
-	ce = i915_gem_context_get_engine(ctx, engine->id);
-	if (IS_ERR(ce))
-		return ERR_CAST(ce);
-
-	rq = intel_context_create_request(ce);
-	intel_context_put(ce);
-
-	return rq;
-}
-
 static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 1eee7416af31..8025a89b5999 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -246,9 +246,6 @@ i915_request_create(struct intel_context *ce);
 
 struct i915_request *__i915_request_commit(struct i915_request *request);
 
-struct i915_request * __must_check
-i915_request_alloc(struct intel_engine_cs *engine,
-		   struct i915_gem_context *ctx);
 void i915_request_retire_upto(struct i915_request *rq);
 
 static inline struct i915_request *
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 90721b54e7ae..1e1f83326a96 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -26,6 +26,7 @@
 
 #include <linux/prime_numbers.h>
 
+#include "igt_gem_utils.h"
 #include "mock_drm.h"
 #include "i915_random.h"
 
@@ -980,7 +981,7 @@ static int gpu_write(struct i915_vma *vma,
 	if (IS_ERR(batch))
 		return PTR_ERR(batch);
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_batch;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 0342de369d3e..c6a9bff85311 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -8,8 +8,9 @@
 
 #include "../i915_selftest.h"
 
-#include "mock_context.h"
+#include "igt_gem_utils.h"
 #include "igt_flush_test.h"
+#include "mock_context.h"
 
 static int switch_to_context(struct drm_i915_private *i915,
 			     struct i915_gem_context *ctx)
@@ -20,7 +21,7 @@ static int switch_to_context(struct drm_i915_private *i915,
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, ctx);
+		rq = igt_request_alloc(ctx, engine);
 		if (IS_ERR(rq))
 			return PTR_ERR(rq);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index deedd1898fe5..b62f005e4d50 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -29,6 +29,7 @@
 
 #include "i915_random.h"
 #include "igt_flush_test.h"
+#include "igt_gem_utils.h"
 #include "igt_live_test.h"
 #include "igt_reset.h"
 #include "igt_spinner.h"
@@ -91,7 +92,7 @@ static int live_nop_switch(void *arg)
 
 		times[0] = ktime_get_raw();
 		for (n = 0; n < nctx; n++) {
-			rq = i915_request_alloc(engine, ctx[n]);
+			rq = igt_request_alloc(ctx[n], engine);
 			if (IS_ERR(rq)) {
 				err = PTR_ERR(rq);
 				goto out_unlock;
@@ -121,7 +122,7 @@ static int live_nop_switch(void *arg)
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
-				rq = i915_request_alloc(engine, ctx[n % nctx]);
+				rq = igt_request_alloc(ctx[n % nctx], engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					goto out_unlock;
@@ -301,7 +302,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 		goto err_vma;
 	}
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_batch;
@@ -1350,7 +1351,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_unpin;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
@@ -1445,7 +1446,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_unpin;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
@@ -1669,7 +1670,7 @@ static int mock_context_barrier(void *arg)
 		goto out;
 	}
 
-	rq = i915_request_alloc(i915->engine[RCS0], ctx);
+	rq = igt_request_alloc(ctx, i915->engine[RCS0]);
 	if (IS_ERR(rq)) {
 		pr_err("Request allocation failed!\n");
 		goto out;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 89766688e420..4fc6e5445dd1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -24,6 +24,7 @@
 
 #include "../i915_selftest.h"
 
+#include "igt_gem_utils.h"
 #include "lib_sw_fence.h"
 #include "mock_context.h"
 #include "mock_drm.h"
@@ -460,7 +461,7 @@ static int igt_evict_contexts(void *arg)
 
 			/* We will need some GGTT space for the rq's context */
 			igt_evict_ctl.fail_if_busy = true;
-			rq = i915_request_alloc(engine, ctx);
+			rq = igt_request_alloc(ctx, engine);
 			igt_evict_ctl.fail_if_busy = false;
 
 			if (IS_ERR(rq)) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 098d7b3aa131..b60591531e4a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -267,7 +267,7 @@ static struct i915_request *
 __live_request_alloc(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine)
 {
-	return i915_request_alloc(engine, ctx);
+	return igt_request_alloc(ctx, engine);
 }
 
 static int __igt_breadcrumbs_smoketest(void *arg)
@@ -1074,7 +1074,7 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (HAS_EXECLISTS(ctx->i915))
 		return INT_MAX;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 	} else {
diff --git a/drivers/gpu/drm/i915/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/selftests/igt_gem_utils.c
new file mode 100644
index 000000000000..16891b1a3e50
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_gem_utils.c
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "igt_gem_utils.h"
+
+#include "gt/intel_context.h"
+
+#include "../i915_gem_context.h"
+#include "../i915_gem_pm.h"
+#include "../i915_request.h"
+
+struct i915_request *
+igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ce = i915_gem_context_get_engine(ctx, engine->id);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	rq = intel_context_create_request(ce);
+	intel_context_put(ce);
+
+	return rq;
+}
diff --git a/drivers/gpu/drm/i915/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/selftests/igt_gem_utils.h
new file mode 100644
index 000000000000..0f17251cf75d
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_gem_utils.h
@@ -0,0 +1,17 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef __IGT_GEM_UTILS_H__
+#define __IGT_GEM_UTILS_H__
+
+struct i915_request;
+struct i915_gem_context;
+struct intel_engine_cs;
+
+struct i915_request *
+igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
+
+#endif /* __IGT_GEM_UTILS_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 16890dfe74c0..ece8a8a0d3b0 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -4,6 +4,7 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include "igt_gem_utils.h"
 #include "igt_spinner.h"
 
 int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915)
@@ -114,7 +115,7 @@ igt_spinner_create_request(struct igt_spinner *spin,
 	if (err)
 		goto unpin_vma;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto unpin_hws;
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index f739ba63057f..b99f7576153c 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -24,6 +24,7 @@
 
 #include "gt/mock_engine.h"
 
+#include "igt_gem_utils.h"
 #include "mock_request.h"
 
 struct i915_request *
@@ -34,7 +35,7 @@ mock_request(struct intel_engine_cs *engine,
 	struct i915_request *request;
 
 	/* NB the i915->requests slab cache is enlarged to fit mock_request */
-	request = i915_request_alloc(engine, context);
+	request = igt_request_alloc(context, engine);
 	if (IS_ERR(request))
 		return NULL;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 23/32] drm/i915: Allow multiple user handles to the same VM
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (20 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 22/32] drm/i915: Move i915_request_alloc into selftests/ Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 24/32] drm/i915: Restore control over ppgtt for context creation ABI Chris Wilson
                   ` (12 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

It was noted that we made the same mistake for VM_ID as for object
handles, whereby we ensured that we only allocated a single handle for
one ppgtt. This has the unfortunate consequence for userspace that they
need to reference count the handles to avoid destroying an active ID. If
we allow multiple handles to the same ppgtt, userspace can freely
unreference any handle they own without fear of destroying the same
handle in use elsewhere.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 26 ++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_gtt.h     |  2 --
 2 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5b9feeb8d006..0d9d435ae398 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -834,8 +834,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 	if (err < 0)
 		goto err_unlock;
 
-	GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */
-	ppgtt->user_handle = err;
+	GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */
 
 	mutex_unlock(&file_priv->vm_idr_lock);
 
@@ -873,10 +872,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
 		return err;
 
 	ppgtt = idr_remove(&file_priv->vm_idr, id);
-	if (ppgtt) {
-		GEM_BUG_ON(ppgtt->user_handle != id);
-		ppgtt->user_handle = 0;
-	}
 
 	mutex_unlock(&file_priv->vm_idr_lock);
 	if (!ppgtt)
@@ -991,18 +986,15 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
 	if (ret)
 		goto err_put;
 
-	if (!ppgtt->user_handle) {
-		ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
-		GEM_BUG_ON(!ret);
-		if (ret < 0)
-			goto err_unlock;
+	ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
+	GEM_BUG_ON(!ret);
+	if (ret < 0)
+		goto err_unlock;
 
-		ppgtt->user_handle = ret;
-		i915_ppgtt_get(ppgtt);
-	}
+	i915_ppgtt_get(ppgtt);
 
 	args->size = 0;
-	args->value = ppgtt->user_handle;
+	args->value = ret;
 
 	ret = 0;
 err_unlock:
@@ -1093,10 +1085,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 		return err;
 
 	ppgtt = idr_find(&file_priv->vm_idr, args->value);
-	if (ppgtt) {
-		GEM_BUG_ON(ppgtt->user_handle != args->value);
+	if (ppgtt)
 		i915_ppgtt_get(ppgtt);
-	}
 	mutex_unlock(&file_priv->vm_idr_lock);
 	if (!ppgtt)
 		return -ENOENT;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c8d96e91f3dc..4832bb5c5fc0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -396,8 +396,6 @@ struct i915_hw_ppgtt {
 		struct i915_page_directory_pointer pdp;	/* GEN8+ */
 		struct i915_page_directory pd;		/* GEN6-7 */
 	};
-
-	u32 user_handle;
 };
 
 struct gen6_hw_ppgtt {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 24/32] drm/i915: Restore control over ppgtt for context creation ABI
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (21 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 23/32] drm/i915: Allow multiple user handles to the same VM Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 25/32] drm/i915: Allow a context to define its set of engines Chris Wilson
                   ` (11 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Having hid the partially exposed new ABI from the PR, put it back again
for completion of context recovery. A significant part of context
recovery is the ability to reuse as much of the old context as is
feasible (to avoid expensive reconstruction). The biggest chunk kept
hidden at the moment is fine-control over the ctx->ppgtt (the GPU page
tables and associated translation tables and kernel maps), so make
control over the ctx->ppgtt explicit.

This allows userspace to create and share virtual memory address spaces
(within the limits of a single fd) between contexts they own, along with
the ability to query the contexts for the vm state.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c         |  2 ++
 drivers/gpu/drm/i915/i915_gem_context.c |  5 -----
 include/uapi/drm/i915_drm.h             | 15 +++++++++++++++
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c8cb70d4fe91..ea9b4eb3bf9b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3150,6 +3150,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 0d9d435ae398..217d4fe0349d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -98,7 +98,6 @@
 #include "i915_user_extensions.h"
 
 #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
-#define I915_CONTEXT_PARAM_VM 0x9
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
@@ -969,8 +968,6 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
 	struct i915_hw_ppgtt *ppgtt;
 	int ret;
 
-	return -EINVAL; /* nothing to see here; please move along */
-
 	if (!ctx->ppgtt)
 		return -ENODEV;
 
@@ -1069,8 +1066,6 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 	struct i915_hw_ppgtt *ppgtt, *old;
 	int err;
 
-	return -EINVAL; /* nothing to see here; please move along */
-
 	if (args->size)
 		return -EINVAL;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a73f5316766..d6ad4a15b2b9 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -355,6 +355,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
 #define DRM_I915_QUERY			0x39
+#define DRM_I915_GEM_VM_CREATE		0x3a
+#define DRM_I915_GEM_VM_DESTROY		0x3b
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -415,6 +417,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1507,6 +1511,17 @@ struct drm_i915_gem_context_param {
  * On creation, all new contexts are marked as recoverable.
  */
 #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
+
+	/*
+	 * The id of the associated virtual memory address space (ppGTT) of
+	 * this context. Can be retrieved and passed to another context
+	 * (on the same fd) for both to use the same ppGTT and so share
+	 * address layouts, and avoid reloading the page tables on context
+	 * switches between themselves.
+	 *
+	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
+	 */
+#define I915_CONTEXT_PARAM_VM		0x9
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 25/32] drm/i915: Allow a context to define its set of engines
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (22 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 24/32] drm/i915: Restore control over ppgtt for context creation ABI Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  9:50   ` Tvrtko Ursulin
  2019-04-17  7:56 ` [PATCH 26/32] drm/i915: Re-expose SINGLE_TIMELINE flags for context creation Chris Wilson
                   ` (10 subsequent siblings)
  34 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.

The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
execbuf. It's use will be revealed in the next patch.

v2: Fixup freeing of local on success of get_engines()
v3: Allow empty engines[]
v4: s/nengine/num_engines/
v5: Replace 64 limit on num_engines with a note that execbuf is
currently limited to only using the first 64 engines.
v6: Actually use the engines_mutex to guard the ctx->engines.

Testcase: igt/gem_ctx_engines
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c       | 219 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h       |  18 ++
 drivers/gpu/drm/i915/i915_gem_context_types.h |   1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |   5 +-
 drivers/gpu/drm/i915/i915_utils.h             |  36 +++
 include/uapi/drm/i915_drm.h                   |  31 +++
 6 files changed, 303 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 217d4fe0349d..b4b7a2eee1c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,7 +90,6 @@
 #include <drm/i915_drm.h>
 
 #include "gt/intel_lrc_reg.h"
-#include "gt/intel_workarounds.h"
 
 #include "i915_drv.h"
 #include "i915_globals.h"
@@ -143,13 +142,17 @@ static void lut_close(struct i915_gem_context *ctx)
 static struct intel_context *
 lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance)
 {
-	struct intel_engine_cs *engine;
+	if (!i915_gem_context_user_engines(ctx)) {
+		struct intel_engine_cs *engine;
 
-	engine = intel_engine_lookup_user(ctx->i915, class, instance);
-	if (!engine)
-		return ERR_PTR(-EINVAL);
+		engine = intel_engine_lookup_user(ctx->i915, class, instance);
+		if (!engine)
+			return ERR_PTR(-EINVAL);
+
+		instance = engine->id;
+	}
 
-	return i915_gem_context_get_engine(ctx, engine->id);
+	return i915_gem_context_get_engine(ctx, instance);
 }
 
 static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
@@ -257,6 +260,17 @@ static void free_engines(struct i915_gem_engines *e)
 	__free_engines(e, e->num_engines);
 }
 
+static void free_engines_rcu(struct work_struct *wrk)
+{
+	struct i915_gem_engines *e =
+		container_of(wrk, struct i915_gem_engines, rcu.work);
+	struct drm_i915_private *i915 = e->i915;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	free_engines(e);
+	mutex_unlock(&i915->drm.struct_mutex);
+}
+
 static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 {
 	struct intel_engine_cs *engine;
@@ -1382,6 +1396,191 @@ static int set_sseu(struct i915_gem_context *ctx,
 	return ret;
 }
 
+struct set_engines {
+	struct i915_gem_context *ctx;
+	struct i915_gem_engines *engines;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int
+set_engines(struct i915_gem_context *ctx,
+	    const struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user =
+		u64_to_user_ptr(args->value);
+	struct set_engines set = { .ctx = ctx };
+	unsigned int num_engines, n;
+	u64 extensions;
+	int err;
+
+	if (!args->size) { /* switch back to legacy user_ring_map */
+		if (!i915_gem_context_user_engines(ctx))
+			return 0;
+
+		set.engines = default_engines(ctx);
+		if (IS_ERR(set.engines))
+			return PTR_ERR(set.engines);
+
+		goto replace;
+	}
+
+	BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines)));
+	if (args->size < sizeof(*user) ||
+	    !IS_ALIGNED(args->size, sizeof(*user->engines))) {
+		DRM_DEBUG("Invalid size for engine array: %d\n",
+			  args->size);
+		return -EINVAL;
+	}
+
+	/*
+	 * Note that I915_EXEC_RING_MASK limits execbuf to only using the
+	 * first 64 engines defined here.
+	 */
+	num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines);
+
+	set.engines = kmalloc(struct_size(set.engines, engines, num_engines),
+			      GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	set.engines->i915 = ctx->i915;
+	for (n = 0; n < num_engines; n++) {
+		struct i915_engine_class_instance ci;
+		struct intel_engine_cs *engine;
+
+		if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
+			__free_engines(set.engines, n);
+			return -EFAULT;
+		}
+
+		if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID &&
+		    ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
+			set.engines->engines[n] = NULL;
+			continue;
+		}
+
+		engine = intel_engine_lookup_user(ctx->i915,
+						  ci.engine_class,
+						  ci.engine_instance);
+		if (!engine) {
+			DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			__free_engines(set.engines, n);
+			return -ENOENT;
+		}
+
+		set.engines->engines[n] = intel_context_create(ctx, engine);
+		if (!set.engines->engines[n]) {
+			__free_engines(set.engines, n);
+			return -ENOMEM;
+		}
+	}
+	set.engines->num_engines = num_engines;
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_engines__extensions,
+					   ARRAY_SIZE(set_engines__extensions),
+					   &set);
+	if (err) {
+		free_engines(set.engines);
+		return err;
+	}
+
+replace:
+	mutex_lock(&ctx->engines_mutex);
+	if (args->size)
+		i915_gem_context_set_user_engines(ctx);
+	else
+		i915_gem_context_clear_user_engines(ctx);
+	rcu_swap_protected(ctx->engines, set.engines, 1);
+	mutex_unlock(&ctx->engines_mutex);
+
+	INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu);
+	queue_rcu_work(system_wq, &set.engines->rcu);
+
+	return 0;
+}
+
+static int
+get_engines(struct i915_gem_context *ctx,
+	    struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user;
+	struct i915_gem_engines *e;
+	size_t n, count, size;
+	int err = 0;
+
+	err = mutex_lock_interruptible(&ctx->engines_mutex);
+	if (err)
+		return err;
+
+	if (!i915_gem_context_user_engines(ctx)) {
+		args->size = 0;
+		goto unlock;
+	}
+
+	e = i915_gem_context_engines(ctx);
+	count = e->num_engines;
+
+	/* Be paranoid in case we have an impedance mismatch */
+	if (!check_struct_size(user, engines, count, &size)) {
+		err = -EINVAL;
+		goto unlock;
+	}
+	if (overflows_type(size, args->size)) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	if (!args->size) {
+		args->size = size;
+		goto unlock;
+	}
+
+	if (args->size < size) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	user = u64_to_user_ptr(args->value);
+	if (!access_ok(user, size)) {
+		err = -EFAULT;
+		goto unlock;
+	}
+
+	if (put_user(0, &user->extensions)) {
+		err = -EFAULT;
+		goto unlock;
+	}
+
+	for (n = 0; n < count; n++) {
+		struct i915_engine_class_instance ci = {
+			.engine_class = I915_ENGINE_CLASS_INVALID,
+			.engine_instance = I915_ENGINE_CLASS_INVALID_NONE,
+		};
+
+		if (e->engines[n]) {
+			ci.engine_class = e->engines[n]->engine->uabi_class;
+			ci.engine_instance = e->engines[n]->engine->instance;
+		}
+
+		if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
+			err = -EFAULT;
+			goto unlock;
+		}
+	}
+
+	args->size = size;
+
+unlock:
+	mutex_unlock(&ctx->engines_mutex);
+	return err;
+}
+
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
 			struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
@@ -1455,6 +1654,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
 		ret = set_ppgtt(fpriv, ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -1685,6 +1888,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		ret = get_ppgtt(file_priv, ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = get_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 272e183ebc0c..9ad4a6362438 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -112,6 +112,24 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
 	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
 }
 
+static inline bool
+i915_gem_context_user_engines(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_set_user_engines(struct i915_gem_context *ctx)
+{
+	set_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
+{
+	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
 int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
 static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index d5cb4f121aad..fb965ded2508 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -146,6 +146,7 @@ struct i915_gem_context {
 #define CONTEXT_BANNED			0
 #define CONTEXT_CLOSED			1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
+#define CONTEXT_USER_ENGINES		3
 
 	/**
 	 * @hw_id: - unique identifier for the context
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 679f7c1561ba..d6c5220addd0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2165,7 +2165,10 @@ eb_select_engine(struct i915_execbuffer *eb,
 	unsigned int idx;
 	int err;
 
-	idx = eb_select_legacy_ring(eb, file, args);
+	if (i915_gem_context_user_engines(eb->gem_context))
+		idx = args->flags & I915_EXEC_RING_MASK;
+	else
+		idx = eb_select_legacy_ring(eb, file, args);
 
 	ce = i915_gem_context_get_engine(eb->gem_context, idx);
 	if (IS_ERR(ce))
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 2dbe8933b50a..1436fe2fb5f8 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -25,6 +25,9 @@
 #ifndef __I915_UTILS_H
 #define __I915_UTILS_H
 
+#include <linux/kernel.h>
+#include <linux/overflow.h>
+
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
 #if 0
@@ -73,6 +76,39 @@
 #define overflows_type(x, T) \
 	(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
 
+static inline bool
+__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
+{
+	size_t sz;
+
+	if (check_mul_overflow(count, arr, &sz))
+		return false;
+
+	if (check_add_overflow(sz, base, &sz))
+		return false;
+
+	*size = sz;
+	return true;
+}
+
+/**
+ * check_struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @n: Number of elements in the array.
+ * @sz: Total size of structure and array
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @n @member elements, like struct_size() but reports
+ * whether it overflowed, and the resultant size in @sz
+ *
+ * Return: false if the calculation overflowed.
+ */
+#define check_struct_size(p, member, n, sz) \
+	likely(__check_struct_size(sizeof(*(p)), \
+				   sizeof(*(p)->member) + __must_be_array((p)->member), \
+				   n, sz))
+
 #define ptr_mask_bits(ptr, n) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
 	(typeof(ptr))(__v & -BIT(n));					\
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index d6ad4a15b2b9..8e1bb22926e4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -136,6 +136,7 @@ enum drm_i915_gem_engine_class {
 struct i915_engine_class_instance {
 	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
 	__u16 engine_instance;
+#define I915_ENGINE_CLASS_INVALID_NONE -1
 };
 
 /**
@@ -1522,6 +1523,26 @@ struct drm_i915_gem_context_param {
 	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
 	 */
 #define I915_CONTEXT_PARAM_VM		0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ *	engine_class: I915_ENGINE_CLASS_INVALID,
+ *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
@@ -1585,6 +1606,16 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+struct i915_context_param_engines {
+	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
+	__u64 extensions; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
 struct drm_i915_gem_context_create_ext_setparam {
 #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
 	struct i915_user_extension base;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 26/32] drm/i915: Re-expose SINGLE_TIMELINE flags for context creation
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (23 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 25/32] drm/i915: Allow a context to define its set of engines Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 27/32] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
                   ` (9 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

The SINGLE_TIMELINE flag can be used to create a context such that all
engine instances within that context share a common timeline. This can
be useful for mixing operations between real and virtual engines, or
when using a composite context for a single client API context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 4 ----
 include/uapi/drm/i915_drm.h             | 3 ++-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b4b7a2eee1c9..d6bea51050c0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -96,8 +96,6 @@
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
-#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
-
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
 static struct i915_global_gem_context {
@@ -493,8 +491,6 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	BUILD_BUG_ON(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &
-		     ~I915_CONTEXT_CREATE_FLAGS_UNKNOWN);
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
 	    !HAS_EXECLISTS(dev_priv))
 		return ERR_PTR(-EINVAL);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8e1bb22926e4..7aef672ab3c7 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1469,8 +1469,9 @@ struct drm_i915_gem_context_create_ext {
 	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE	(1u << 1)
 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
 	__u64 extensions;
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 27/32] drm/i915: Allow userspace to clone contexts on creation
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (24 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 26/32] drm/i915: Re-expose SINGLE_TIMELINE flags for context creation Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  9:50   ` Tvrtko Ursulin
  2019-04-17  7:56 ` [PATCH 28/32] drm/i915: Load balancing across a virtual engine Chris Wilson
                   ` (8 subsequent siblings)
  34 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

A usecase arose out of handling context recovery in mesa, whereby they
wish to recreate a context with fresh logical state but preserving all
other details of the original. Currently, they create a new context and
iterate over which bits they want to copy across, but it would much more
convenient if they were able to just pass in a target context to clone
during creation. This essentially extends the setparam during creation
to pull the details from a target context instead of the user supplied
parameters.

The ideal here is that we don't expose control over anything more than
can be obtained via CONTEXT_PARAM. That is userspace retains explicit
control over all features, and this api is just convenience.

For example, you could replace

	struct context_param p = { .param = CONTEXT_PARAM_VM };

	param.ctx_id = old_id;
	gem_context_get_param(&p.param);

	new_id = gem_context_create();

	param.ctx_id = new_id;
	gem_context_set_param(&p.param);

	gem_vm_destroy(param.value); /* drop the ref to VM_ID handle */

with

	struct create_ext_param p = {
	  { .name = CONTEXT_CREATE_CLONE },
	  .clone_id = old_id,
	  .flags = CLONE_FLAGS_VM
	}
	new_id = gem_context_create_ext(&p);

and not have to worry about stray namespace pollution etc.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 206 ++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h             |  15 ++
 2 files changed, 221 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d6bea51050c0..ba7582d955d1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1682,8 +1682,214 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
 	return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
 }
 
+static int clone_engines(struct i915_gem_context *dst,
+			 struct i915_gem_context *src)
+{
+	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
+	struct i915_gem_engines *clone;
+	bool user_engines;
+	unsigned long n;
+
+	clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL);
+	if (!clone)
+		goto err_unlock;
+
+	clone->i915 = dst->i915;
+	for (n = 0; n < e->num_engines; n++) {
+		if (!e->engines[n]) {
+			clone->engines[n] = NULL;
+			continue;
+		}
+
+		clone->engines[n] =
+			intel_context_create(dst, e->engines[n]->engine);
+		if (!clone->engines[n]) {
+			__free_engines(clone, n);
+			goto err_unlock;
+		}
+	}
+	clone->num_engines = n;
+
+	user_engines = i915_gem_context_user_engines(src);
+	i915_gem_context_unlock_engines(src);
+
+	free_engines(dst->engines);
+	RCU_INIT_POINTER(dst->engines, clone);
+	if (user_engines)
+		i915_gem_context_set_user_engines(dst);
+	else
+		i915_gem_context_clear_user_engines(dst);
+	return 0;
+
+err_unlock:
+	i915_gem_context_unlock_engines(src);
+	return -ENOMEM;
+}
+
+static int clone_flags(struct i915_gem_context *dst,
+		       struct i915_gem_context *src)
+{
+	dst->user_flags = src->user_flags;
+	return 0;
+}
+
+static int clone_schedattr(struct i915_gem_context *dst,
+			   struct i915_gem_context *src)
+{
+	dst->sched = src->sched;
+	return 0;
+}
+
+static int clone_sseu(struct i915_gem_context *dst,
+		      struct i915_gem_context *src)
+{
+	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
+	struct i915_gem_engines *clone;
+	unsigned long n;
+	int err;
+
+	clone = dst->engines; /* no locking required; sole access */
+	if (e->num_engines != clone->num_engines) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	for (n = 0; n < e->num_engines; n++) {
+		struct intel_context *ce = e->engines[n];
+
+		if (clone->engines[n]->engine->class != ce->engine->class) {
+			/* Must have compatible engine maps! */
+			err = -EINVAL;
+			goto unlock;
+		}
+
+		/* serialises with set_sseu */
+		err = intel_context_lock_pinned(ce);
+		if (err)
+			goto unlock;
+
+		clone->engines[n]->sseu = ce->sseu;
+		intel_context_unlock_pinned(ce);
+	}
+
+	err = 0;
+unlock:
+	i915_gem_context_unlock_engines(src);
+	return err;
+}
+
+static int clone_timeline(struct i915_gem_context *dst,
+			  struct i915_gem_context *src)
+{
+	if (src->timeline) {
+		GEM_BUG_ON(src->timeline == dst->timeline);
+
+		if (dst->timeline)
+			i915_timeline_put(dst->timeline);
+		dst->timeline = i915_timeline_get(src->timeline);
+	}
+
+	return 0;
+}
+
+static int clone_vm(struct i915_gem_context *dst,
+		    struct i915_gem_context *src)
+{
+	struct i915_hw_ppgtt *ppgtt;
+
+	rcu_read_lock();
+	do {
+		ppgtt = READ_ONCE(src->ppgtt);
+		if (!ppgtt)
+			break;
+
+		if (!kref_get_unless_zero(&ppgtt->ref))
+			continue;
+
+		/*
+		 * This ppgtt may have be reallocated between
+		 * the read and the kref, and reassigned to a third
+		 * context. In order to avoid inadvertent sharing
+		 * of this ppgtt with that third context (and not
+		 * src), we have to confirm that we have the same
+		 * ppgtt after passing through the strong memory
+		 * barrier implied by a successful
+		 * kref_get_unless_zero().
+		 *
+		 * Once we have acquired the current ppgtt of src,
+		 * we no longer care if it is released from src, as
+		 * it cannot be reallocated elsewhere.
+		 */
+
+		if (ppgtt == READ_ONCE(src->ppgtt))
+			break;
+
+		i915_ppgtt_put(ppgtt);
+	} while (1);
+	rcu_read_unlock();
+
+	if (ppgtt) {
+		__assign_ppgtt(dst, ppgtt);
+		i915_ppgtt_put(ppgtt);
+	}
+
+	return 0;
+}
+
+static int create_clone(struct i915_user_extension __user *ext, void *data)
+{
+	static int (* const fn[])(struct i915_gem_context *dst,
+				  struct i915_gem_context *src) = {
+#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
+		MAP(ENGINES, clone_engines),
+		MAP(FLAGS, clone_flags),
+		MAP(SCHEDATTR, clone_schedattr),
+		MAP(SSEU, clone_sseu),
+		MAP(TIMELINE, clone_timeline),
+		MAP(VM, clone_vm),
+#undef MAP
+	};
+	struct drm_i915_gem_context_create_ext_clone local;
+	const struct create_ext *arg = data;
+	struct i915_gem_context *dst = arg->ctx;
+	struct i915_gem_context *src;
+	int err, bit;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) !=
+		     I915_CONTEXT_CLONE_UNKNOWN);
+
+	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
+		return -EINVAL;
+
+	if (local.rsvd)
+		return -EINVAL;
+
+	rcu_read_lock();
+	src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id);
+	rcu_read_unlock();
+	if (!src)
+		return -ENOENT;
+
+	GEM_BUG_ON(src == dst);
+
+	for (bit = 0; bit < ARRAY_SIZE(fn); bit++) {
+		if (!(local.flags & BIT(bit)))
+			continue;
+
+		err = fn[bit](dst, src);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
 	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
 };
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7aef672ab3c7..7694113362d4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1623,6 +1623,21 @@ struct drm_i915_gem_context_create_ext_setparam {
 	struct drm_i915_gem_context_param param;
 };
 
+struct drm_i915_gem_context_create_ext_clone {
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
+	struct i915_user_extension base;
+	__u32 clone_id;
+	__u32 flags;
+#define I915_CONTEXT_CLONE_ENGINES	(1u << 0)
+#define I915_CONTEXT_CLONE_FLAGS	(1u << 1)
+#define I915_CONTEXT_CLONE_SCHEDATTR	(1u << 2)
+#define I915_CONTEXT_CLONE_SSEU		(1u << 3)
+#define I915_CONTEXT_CLONE_TIMELINE	(1u << 4)
+#define I915_CONTEXT_CLONE_VM		(1u << 5)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+	__u64 rsvd;
+};
+
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;
 	__u32 pad;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 28/32] drm/i915: Load balancing across a virtual engine
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (25 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 27/32] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17 11:26   ` Tvrtko Ursulin
  2019-04-17  7:56 ` [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine Chris Wilson
                   ` (7 subsequent siblings)
  34 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Having allowed the user to define a set of engines that they will want
to only use, we go one step further and allow them to bind those engines
into a single virtual instance. Submitting a batch to the virtual engine
will then forward it to any one of the set in a manner as best to
distribute load.  The virtual engine has a single timeline across all
engines (it operates as a single queue), so it is not able to concurrently
run batches across multiple engines by itself; that is left up to the user
to submit multiple concurrent batches to multiple queues. Multiple users
will be load balanced across the system.

The mechanism used for load balancing in this patch is a late greedy
balancer. When a request is ready for execution, it is added to each
engine's queue, and when an engine is ready for its next request it
claims it from the virtual engine. The first engine to do so, wins, i.e.
the request is executed at the earliest opportunity (idle moment) in the
system.

As not all HW is created equal, the user is still able to skip the
virtual engine and execute the batch on a specific engine, all within the
same queue. It will then be executed in order on the correct engine,
with execution on other virtual engines being moved away due to the load
detection.

A couple of areas for potential improvement left!

- The virtual engine always take priority over equal-priority tasks.
Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
and hopefully the virtual and real engines are not then congested (i.e.
all work is via virtual engines, or all work is to the real engine).

- We require the breadcrumb irq around every virtual engine request. For
normal engines, we eliminate the need for the slow round trip via
interrupt by using the submit fence and queueing in order. For virtual
engines, we have to allow any job to transfer to a new ring, and cannot
coalesce the submissions, so require the completion fence instead,
forcing the persistent use of interrupts.

- We only drip feed single requests through each virtual engine and onto
the physical engines, even if there was enough work to fill all ELSP,
leaving small stalls with an idle CS event at the end of every request.
Could we be greedy and fill both slots? Being lazy is virtuous for load
distribution on less-than-full workloads though.

Other areas of improvement are more general, such as reducing lock
contention, reducing dispatch overhead, looking at direct submission
rather than bouncing around tasklets etc.

sseu: Lift the restriction to allow sseu to be reconfigured on virtual
engines composed of RENDER_CLASS (rcs).

v2: macroize check_user_mbz()
v3: Cancel virtual engines on wedging
v4: Commence commenting
v5: Replace 64b sibling_mask with a list of class:instance

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c  |   6 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   8 +
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 611 ++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_lrc.h          |   9 +
 drivers/gpu/drm/i915/gt/selftest_lrc.c       | 180 ++++++
 drivers/gpu/drm/i915/i915_gem.h              |   5 +
 drivers/gpu/drm/i915/i915_gem_context.c      | 118 +++-
 drivers/gpu/drm/i915/i915_scheduler.c        |  18 +-
 drivers/gpu/drm/i915/i915_timeline_types.h   |   1 +
 include/uapi/drm/i915_drm.h                  |  39 ++
 10 files changed, 978 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index e19f84b006cc..f900f0680647 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -290,8 +290,12 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
 				break;
 		}
 		list_add(&rq->signal_link, pos);
-		if (pos == &ce->signals) /* catch transitions from empty list */
+		if (pos == &ce->signals) { /* catch transitions from empty */
 			list_move_tail(&ce->signal_link, &b->signalers);
+		} else if (ce->engine != rq->engine) { /* virtualised */
+			list_move_tail(&ce->signal_link, &b->signalers);
+			intel_engine_queue_breadcrumbs(rq->engine);
+		}
 
 		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 		spin_unlock(&b->irq_lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d972c339309c..6dceb78e95d7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -227,6 +227,7 @@ struct intel_engine_execlists {
 	 * @queue: queue of requests, in priority lists
 	 */
 	struct rb_root_cached queue;
+	struct rb_root_cached virtual;
 
 	/**
 	 * @csb_write: control register for Context Switch buffer
@@ -445,6 +446,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
+#define I915_ENGINE_IS_VIRTUAL       BIT(5)
 	unsigned int flags;
 
 	/*
@@ -534,6 +536,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 }
 
+static inline bool
+intel_engine_is_virtual(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_IS_VIRTUAL;
+}
+
 #define instdone_slice_mask(dev_priv__) \
 	(IS_GEN(dev_priv__, 7) ? \
 	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 01f58a152a9e..d6efd6aa67cb 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -136,6 +136,7 @@
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "i915_vgpu.h"
+#include "intel_engine_pm.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
@@ -165,6 +166,41 @@
 
 #define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
 
+struct virtual_engine {
+	struct intel_engine_cs base;
+	struct intel_context context;
+
+	/*
+	 * We allow only a single request through the virtual engine at a time
+	 * (each request in the timeline waits for the completion fence of
+	 * the previous before being submitted). By restricting ourselves to
+	 * only submitting a single request, each request is placed on to a
+	 * physical to maximise load spreading (by virtue of the late greedy
+	 * scheduling -- each real engine takes the next available request
+	 * upon idling).
+	 */
+	struct i915_request *request;
+
+	/*
+	 * We keep a rbtree of available virtual engines inside each physical
+	 * engine, sorted by priority. Here we preallocate the nodes we need
+	 * for the virtual engine, indexed by physical_engine->id.
+	 */
+	struct ve_node {
+		struct rb_node rb;
+		int prio;
+	} nodes[I915_NUM_ENGINES];
+
+	/* And finally, which physical engines this virtual engine maps onto. */
+	unsigned int num_siblings;
+	struct intel_engine_cs *siblings[0];
+};
+
+static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
+{
+	return container_of(engine, struct virtual_engine, base);
+}
+
 static int execlists_context_deferred_alloc(struct intel_context *ce,
 					    struct intel_engine_cs *engine);
 static void execlists_init_reg_state(u32 *reg_state,
@@ -228,7 +264,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
 }
 
 static inline bool need_preempt(const struct intel_engine_cs *engine,
-				const struct i915_request *rq)
+				const struct i915_request *rq,
+				struct rb_node *rb)
 {
 	int last_prio;
 
@@ -263,6 +300,22 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	    rq_prio(list_next_entry(rq, link)) > last_prio)
 		return true;
 
+	if (rb) { /* XXX virtual precedence */
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		bool preempt = false;
+
+		if (engine == ve->siblings[0]) { /* only preempt one sibling */
+			spin_lock(&ve->base.timeline.lock);
+			if (ve->request)
+				preempt = rq_prio(ve->request) > last_prio;
+			spin_unlock(&ve->base.timeline.lock);
+		}
+
+		if (preempt)
+			return preempt;
+	}
+
 	/*
 	 * If the inflight context did not trigger the preemption, then maybe
 	 * it was the set of queued requests? Pick the highest priority in
@@ -381,6 +434,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->timeline.requests,
 					 link) {
+		struct intel_engine_cs *owner;
+
 		if (i915_request_completed(rq))
 			break;
 
@@ -389,14 +444,30 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 
 		GEM_BUG_ON(rq->hw_context->active);
 
-		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != prio) {
-			prio = rq_prio(rq);
-			pl = i915_sched_lookup_priolist(engine, prio);
-		}
-		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+		/*
+		 * Push the request back into the queue for later resubmission.
+		 * If this request is not native to this physical engine (i.e.
+		 * it came from a virtual source), push it back onto the virtual
+		 * engine so that it can be moved across onto another physical
+		 * engine as load dictates.
+		 */
+		owner = rq->hw_context->engine;
+		if (likely(owner == engine)) {
+			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+			if (rq_prio(rq) != prio) {
+				prio = rq_prio(rq);
+				pl = i915_sched_lookup_priolist(engine, prio);
+			}
+			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
-		list_add(&rq->sched.link, pl);
+			list_add(&rq->sched.link, pl);
+		} else {
+			if (__i915_request_has_started(rq))
+				rq->sched.attr.priority |= ACTIVE_PRIORITY;
+
+			rq->engine = owner;
+			owner->submit_request(rq);
+		}
 
 		active = rq;
 	}
@@ -658,6 +729,72 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 						  execlists));
 }
 
+static void virtual_update_register_offsets(u32 *regs,
+					    struct intel_engine_cs *engine)
+{
+	u32 base = engine->mmio_base;
+
+	regs[CTX_CONTEXT_CONTROL] =
+		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
+	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
+	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
+	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
+	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
+
+	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
+	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
+	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+	regs[CTX_SECOND_BB_HEAD_U] =
+		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
+	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
+	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
+
+	regs[CTX_CTX_TIMESTAMP] =
+		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
+	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
+	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
+	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
+	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
+	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
+	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
+	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
+
+	if (engine->class == RENDER_CLASS) {
+		regs[CTX_RCS_INDIRECT_CTX] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
+		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
+		regs[CTX_BB_PER_CTX_PTR] =
+			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
+
+		regs[CTX_R_PWR_CLK_STATE] =
+			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+	}
+}
+
+static bool virtual_matches(const struct virtual_engine *ve,
+			    const struct i915_request *rq,
+			    const struct intel_engine_cs *engine)
+{
+	const struct intel_engine_cs *active;
+
+	/*
+	 * We track when the HW has completed saving the context image
+	 * (i.e. when we have seen the final CS event switching out of
+	 * the context) and must not overwrite the context image before
+	 * then. This restricts us to only using the active engine
+	 * while the previous virtualized request is inflight (so
+	 * we reuse the register offsets). This is a very small
+	 * hystersis on the greedy seelction algorithm.
+	 */
+	active = READ_ONCE(ve->context.active);
+	if (active && active != engine)
+		return false;
+
+	return true;
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -690,6 +827,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
+	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+
+		if (!rq) { /* lazily cleanup after another engine handled rq */
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		if (!virtual_matches(ve, rq, engine)) {
+			rb = rb_next(rb);
+			continue;
+		}
+
+		break;
+	}
+
 	if (last) {
 		/*
 		 * Don't resubmit or switch until all outstanding
@@ -711,7 +868,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last)) {
+		if (need_preempt(engine, last, rb)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -751,6 +908,89 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		last->tail = last->wa_tail;
 	}
 
+	while (rb) { /* XXX virtual is always taking precedence */
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq;
+
+		spin_lock(&ve->base.timeline.lock);
+
+		rq = ve->request;
+		if (unlikely(!rq)) { /* lost the race to a sibling */
+			spin_unlock(&ve->base.timeline.lock);
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		GEM_BUG_ON(rq != ve->request);
+		GEM_BUG_ON(rq->engine != &ve->base);
+		GEM_BUG_ON(rq->hw_context != &ve->context);
+
+		if (rq_prio(rq) >= queue_prio(execlists)) {
+			if (!virtual_matches(ve, rq, engine)) {
+				spin_unlock(&ve->base.timeline.lock);
+				rb = rb_next(rb);
+				continue;
+			}
+
+			if (last && !can_merge_rq(last, rq)) {
+				spin_unlock(&ve->base.timeline.lock);
+				return; /* leave this rq for another engine */
+			}
+
+			GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
+				  engine->name,
+				  rq->fence.context,
+				  rq->fence.seqno,
+				  i915_request_completed(rq) ? "!" :
+				  i915_request_started(rq) ? "*" :
+				  "",
+				  yesno(engine != ve->siblings[0]));
+
+			ve->request = NULL;
+			ve->base.execlists.queue_priority_hint = INT_MIN;
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+
+			rq->engine = engine;
+
+			if (engine != ve->siblings[0]) {
+				u32 *regs = ve->context.lrc_reg_state;
+				unsigned int n;
+
+				GEM_BUG_ON(READ_ONCE(ve->context.active));
+				virtual_update_register_offsets(regs, engine);
+
+				/*
+				 * Move the bound engine to the top of the list
+				 * for future execution. We then kick this
+				 * tasklet first before checking others, so that
+				 * we preferentially reuse this set of bound
+				 * registers.
+				 */
+				for (n = 1; n < ve->num_siblings; n++) {
+					if (ve->siblings[n] == engine) {
+						swap(ve->siblings[n],
+						     ve->siblings[0]);
+						break;
+					}
+				}
+
+				GEM_BUG_ON(ve->siblings[0] != engine);
+			}
+
+			__i915_request_submit(rq);
+			trace_i915_request_in(rq, port_index(port, execlists));
+			submit = true;
+			last = rq;
+		}
+
+		spin_unlock(&ve->base.timeline.lock);
+		break;
+	}
+
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
@@ -2043,6 +2283,26 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		i915_priolist_free(p);
 	}
 
+	/* Cancel all attached virtual engines */
+	while ((rb = rb_first_cached(&execlists->virtual))) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+
+		rb_erase_cached(rb, &execlists->virtual);
+		RB_CLEAR_NODE(rb);
+
+		spin_lock(&ve->base.timeline.lock);
+		if (ve->request) {
+			ve->request->engine = engine;
+			__i915_request_submit(ve->request);
+			dma_fence_set_error(&ve->request->fence, -EIO);
+			i915_request_mark_complete(ve->request);
+			ve->base.execlists.queue_priority_hint = INT_MIN;
+			ve->request = NULL;
+		}
+		spin_unlock(&ve->base.timeline.lock);
+	}
+
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
 	execlists->queue_priority_hint = INT_MIN;
@@ -2779,6 +3039,316 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
 	return ret;
 }
 
+static void virtual_context_destroy(struct kref *kref)
+{
+	struct virtual_engine *ve =
+		container_of(kref, typeof(*ve), context.ref);
+	unsigned int n;
+
+	GEM_BUG_ON(ve->request);
+	GEM_BUG_ON(ve->context.active);
+
+	for (n = 0; n < ve->num_siblings; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct rb_node *node = &ve->nodes[sibling->id].rb;
+
+		if (RB_EMPTY_NODE(node))
+			continue;
+
+		spin_lock_irq(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(node))
+			rb_erase_cached(node, &sibling->execlists.virtual);
+
+		spin_unlock_irq(&sibling->timeline.lock);
+	}
+	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+	if (ve->context.state)
+		__execlists_context_fini(&ve->context);
+
+	i915_timeline_fini(&ve->base.timeline);
+	kfree(ve);
+}
+
+static void virtual_engine_initial_hint(struct virtual_engine *ve)
+{
+	int swp;
+
+	/*
+	 * Pick a random sibling on starting to help spread the load around.
+	 *
+	 * New contexts are typically created with exactly the same order
+	 * of siblings, and often started in batches. Due to the way we iterate
+	 * the array of sibling when submitting requests, sibling[0] is
+	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
+	 * randomised across the system, we also help spread the load by the
+	 * first engine we inspect being different each time.
+	 *
+	 * NB This does not force us to execute on this engine, it will just
+	 * typically be the first we inspect for submission.
+	 */
+	swp = prandom_u32_max(ve->num_siblings);
+	if (!swp)
+		return;
+
+	swap(ve->siblings[swp], ve->siblings[0]);
+	virtual_update_register_offsets(ve->context.lrc_reg_state,
+					ve->siblings[0]);
+}
+
+static int virtual_context_pin(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	int err;
+
+	/* Note: we must use a real engine class for setting up reg state */
+	err = __execlists_context_pin(ce, ve->siblings[0]);
+	if (err)
+		return err;
+
+	virtual_engine_initial_hint(ve);
+	return 0;
+}
+
+static void virtual_context_enter(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	unsigned int n;
+
+	for (n = 0; n < ve->num_siblings; n++)
+		intel_engine_pm_get(ve->siblings[n]);
+}
+
+static void virtual_context_exit(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	unsigned int n;
+
+	for (n = 0; n < ve->num_siblings; n++)
+		intel_engine_pm_put(ve->siblings[n]);
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+	.pin = virtual_context_pin,
+	.unpin = execlists_context_unpin,
+
+	.enter = virtual_context_enter,
+	.exit = virtual_context_exit,
+
+	.destroy = virtual_context_destroy,
+};
+
+static void virtual_submission_tasklet(unsigned long data)
+{
+	struct virtual_engine * const ve = (struct virtual_engine *)data;
+	const int prio = ve->base.execlists.queue_priority_hint;
+	unsigned int n;
+
+	local_irq_disable();
+	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct ve_node * const node = &ve->nodes[sibling->id];
+		struct rb_node **parent, *rb;
+		bool first;
+
+		spin_lock(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(&node->rb)) {
+			/*
+			 * Cheat and avoid rebalancing the tree if we can
+			 * reuse this node in situ.
+			 */
+			first = rb_first_cached(&sibling->execlists.virtual) ==
+				&node->rb;
+			if (prio == node->prio || (prio > node->prio && first))
+				goto submit_engine;
+
+			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
+		}
+
+		rb = NULL;
+		first = true;
+		parent = &sibling->execlists.virtual.rb_root.rb_node;
+		while (*parent) {
+			struct ve_node *other;
+
+			rb = *parent;
+			other = rb_entry(rb, typeof(*other), rb);
+			if (prio > other->prio) {
+				parent = &rb->rb_left;
+			} else {
+				parent = &rb->rb_right;
+				first = false;
+			}
+		}
+
+		rb_link_node(&node->rb, rb, parent);
+		rb_insert_color_cached(&node->rb,
+				       &sibling->execlists.virtual,
+				       first);
+
+submit_engine:
+		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
+		node->prio = prio;
+		if (first && prio > sibling->execlists.queue_priority_hint) {
+			sibling->execlists.queue_priority_hint = prio;
+			tasklet_hi_schedule(&sibling->execlists.tasklet);
+		}
+
+		spin_unlock(&sibling->timeline.lock);
+	}
+	local_irq_enable();
+}
+
+static void virtual_submit_request(struct i915_request *rq)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->engine);
+
+	GEM_TRACE("%s: rq=%llx:%lld\n",
+		  ve->base.name,
+		  rq->fence.context,
+		  rq->fence.seqno);
+
+	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
+
+	GEM_BUG_ON(ve->request);
+	ve->base.execlists.queue_priority_hint = rq_prio(rq);
+	WRITE_ONCE(ve->request, rq);
+
+	tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
+struct intel_context *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count)
+{
+	struct virtual_engine *ve;
+	unsigned int n;
+	int err;
+
+	if (count == 0)
+		return ERR_PTR(-EINVAL);
+
+	if (count == 1)
+		return intel_context_create(ctx, siblings[0]);
+
+	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
+	if (!ve)
+		return ERR_PTR(-ENOMEM);
+
+	ve->base.i915 = ctx->i915;
+	ve->base.id = -1;
+	ve->base.class = OTHER_CLASS;
+	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
+	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+
+	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+
+	err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
+	if (err)
+		goto err_put;
+	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
+
+	ve->base.cops = &virtual_context_ops;
+	ve->base.request_alloc = execlists_request_alloc;
+
+	ve->base.schedule = i915_schedule;
+	ve->base.submit_request = virtual_submit_request;
+
+	ve->base.execlists.queue_priority_hint = INT_MIN;
+	tasklet_init(&ve->base.execlists.tasklet,
+		     virtual_submission_tasklet,
+		     (unsigned long)ve);
+
+	intel_context_init(&ve->context, ctx, &ve->base);
+
+	for (n = 0; n < count; n++) {
+		struct intel_engine_cs *sibling = siblings[n];
+
+		GEM_BUG_ON(!is_power_of_2(sibling->mask));
+		if (sibling->mask & ve->base.mask) {
+			DRM_DEBUG("duplicate %s entry in load balancer\n",
+				  sibling->name);
+			err = -EINVAL;
+			goto err_put;
+		}
+
+		/*
+		 * The virtual engine implementation is tightly coupled to
+		 * the execlists backend -- we push out request directly
+		 * into a tree inside each physical engine. We could support
+		 * layering if we handle cloning of the requests and
+		 * submitting a copy into each backend.
+		 */
+		if (sibling->execlists.tasklet.func !=
+		    execlists_submission_tasklet) {
+			err = -ENODEV;
+			goto err_put;
+		}
+
+		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
+		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
+
+		ve->siblings[ve->num_siblings++] = sibling;
+		ve->base.mask |= sibling->mask;
+
+		/*
+		 * All physical engines must be compatible for their emission
+		 * functions (as we build the instructions during request
+		 * construction and do not alter them before submission
+		 * on the physical engine). We use the engine class as a guide
+		 * here, although that could be refined.
+		 */
+		if (ve->base.class != OTHER_CLASS) {
+			if (ve->base.class != sibling->class) {
+				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
+					  sibling->class, ve->base.class);
+				err = -EINVAL;
+				goto err_put;
+			}
+			continue;
+		}
+
+		ve->base.class = sibling->class;
+		ve->base.uabi_class = sibling->uabi_class;
+		snprintf(ve->base.name, sizeof(ve->base.name),
+			 "v%dx%d", ve->base.class, count);
+		ve->base.context_size = sibling->context_size;
+
+		ve->base.emit_bb_start = sibling->emit_bb_start;
+		ve->base.emit_flush = sibling->emit_flush;
+		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
+		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
+		ve->base.emit_fini_breadcrumb_dw =
+			sibling->emit_fini_breadcrumb_dw;
+	}
+
+	return &ve->context;
+
+err_put:
+	intel_context_put(&ve->context);
+	return ERR_PTR(err);
+}
+
+struct intel_context *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *src)
+{
+	struct virtual_engine *se = to_virtual_engine(src);
+	struct intel_context *dst;
+
+	dst = intel_execlists_create_virtual(ctx,
+					     se->siblings,
+					     se->num_siblings);
+	if (IS_ERR(dst))
+		return dst;
+
+	return dst;
+}
+
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   struct drm_printer *m,
 				   void (*show_request)(struct drm_printer *m,
@@ -2836,6 +3406,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 		show_request(m, last, "\t\tQ ");
 	}
 
+	last = NULL;
+	count = 0;
+	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+
+		if (rq) {
+			if (count++ < max - 1)
+				show_request(m, rq, "\t\tV ");
+			else
+				last = rq;
+		}
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t\t...skipping %d virtual requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t\tV ");
+	}
+
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index a0dc907a7249..5530606052e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -114,4 +114,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 							const char *prefix),
 				   unsigned int max);
 
+struct intel_context *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count);
+
+struct intel_context *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *src);
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 84538f69185b..f34aa9e042a3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1301,6 +1301,185 @@ static int live_preempt_smoke(void *arg)
 	return err;
 }
 
+static int nop_virtual_engine(struct drm_i915_private *i915,
+			      struct intel_engine_cs **siblings,
+			      unsigned int nsibling,
+			      unsigned int nctx,
+			      unsigned int flags)
+#define CHAIN BIT(0)
+{
+	IGT_TIMEOUT(end_time);
+	struct i915_request *request[16];
+	struct i915_gem_context *ctx[16];
+	struct intel_context *ve[16];
+	unsigned long n, prime, nc;
+	struct igt_live_test t;
+	ktime_t times[2] = {};
+	int err;
+
+	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+
+	for (n = 0; n < nctx; n++) {
+		ctx[n] = kernel_context(i915);
+		if (!ctx[n]) {
+			err = -ENOMEM;
+			nctx = n;
+			goto out;
+		}
+
+		ve[n] = intel_execlists_create_virtual(ctx[n],
+						       siblings, nsibling);
+		if (IS_ERR(ve[n])) {
+			kernel_context_close(ctx[n]);
+			err = PTR_ERR(ve[n]);
+			nctx = n;
+			goto out;
+		}
+
+		err = intel_context_pin(ve[n]);
+		if (err) {
+			intel_context_put(ve[n]);
+			kernel_context_close(ctx[n]);
+			nctx = n;
+			goto out;
+		}
+	}
+
+	err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
+	if (err)
+		goto out;
+
+	for_each_prime_number_from(prime, 1, 8192) {
+		times[1] = ktime_get_raw();
+
+		if (flags & CHAIN) {
+			for (nc = 0; nc < nctx; nc++) {
+				for (n = 0; n < prime; n++) {
+					request[nc] =
+						i915_request_create(ve[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		} else {
+			for (n = 0; n < prime; n++) {
+				for (nc = 0; nc < nctx; nc++) {
+					request[nc] =
+						i915_request_create(ve[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		}
+
+		for (nc = 0; nc < nctx; nc++) {
+			if (i915_request_wait(request[nc],
+					      I915_WAIT_LOCKED,
+					      HZ / 10) < 0) {
+				pr_err("%s(%s): wait for %llx:%lld timed out\n",
+				       __func__, ve[0]->engine->name,
+				       request[nc]->fence.context,
+				       request[nc]->fence.seqno);
+
+				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+					  __func__, ve[0]->engine->name,
+					  request[nc]->fence.context,
+					  request[nc]->fence.seqno);
+				GEM_TRACE_DUMP();
+				i915_gem_set_wedged(i915);
+				break;
+			}
+		}
+
+		times[1] = ktime_sub(ktime_get_raw(), times[1]);
+		if (prime == 1)
+			times[0] = times[1];
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+	}
+
+	err = igt_live_test_end(&t);
+	if (err)
+		goto out;
+
+	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
+		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
+		prime, div64_u64(ktime_to_ns(times[1]), prime));
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (nc = 0; nc < nctx; nc++) {
+		intel_context_unpin(ve[nc]);
+		intel_context_put(ve[nc]);
+		kernel_context_close(ctx[nc]);
+	}
+	return err;
+}
+
+static int live_virtual_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int class, inst;
+	int err = -ENODEV;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+		if (err) {
+			pr_err("Failed to wrap engine %s: err=%d\n",
+			       engine->name, err);
+			goto out_unlock;
+		}
+	}
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, n;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (n = 1; n <= nsibling + 1; n++) {
+			err = nop_virtual_engine(i915, siblings, nsibling,
+						 n, 0);
+			if (err)
+				goto out_unlock;
+		}
+
+		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+		if (err)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1313,6 +1492,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_chain_preempt),
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
+		SUBTEST(live_virtual_engine),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 67f8a4a807a0..fe82d3571072 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -91,4 +91,9 @@ static inline bool __tasklet_enable(struct tasklet_struct *t)
 	return atomic_dec_and_test(&t->count);
 }
 
+static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
+{
+	return test_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ba7582d955d1..57b09f624bb4 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,6 +86,7 @@
  */
 
 #include <linux/log2.h>
+#include <linux/nospec.h>
 
 #include <drm/i915_drm.h>
 
@@ -1209,7 +1210,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce,
 	int ret;
 
 	GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
-	GEM_BUG_ON(ce->engine->id != RCS0);
 
 	ret = intel_context_lock_pinned(ce);
 	if (ret)
@@ -1397,7 +1397,102 @@ struct set_engines {
 	struct i915_gem_engines *engines;
 };
 
+static int
+set_engines__load_balance(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_load_balance __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *stack[16];
+	struct intel_engine_cs **siblings;
+	struct intel_context *ce;
+	u16 num_siblings, idx;
+	unsigned int n;
+	int err;
+
+	if (!HAS_EXECLISTS(set->ctx->i915))
+		return -ENODEV;
+
+	if (USES_GUC_SUBMISSION(set->ctx->i915))
+		return -ENODEV; /* not implement yet */
+
+	if (get_user(idx, &ext->engine_index))
+		return -EFAULT;
+
+	if (idx >= set->engines->num_engines) {
+		DRM_DEBUG("Invalid placement value, %d >= %d\n",
+			  idx, set->engines->num_engines);
+		return -EINVAL;
+	}
+
+	idx = array_index_nospec(idx, set->engines->num_engines);
+	if (set->engines->engines[idx]) {
+		DRM_DEBUG("Invalid placement[%d], already occupied\n", idx);
+		return -EEXIST;
+	}
+
+	if (get_user(num_siblings, &ext->num_siblings))
+		return -EFAULT;
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+		err = check_user_mbz(&ext->mbz64[n]);
+		if (err)
+			return err;
+	}
+
+	siblings = stack;
+	if (num_siblings > ARRAY_SIZE(stack)) {
+		siblings = kmalloc_array(num_siblings,
+					 sizeof(*siblings),
+					 GFP_KERNEL);
+		if (!siblings)
+			return -ENOMEM;
+	}
+
+	for (n = 0; n < num_siblings; n++) {
+		struct i915_engine_class_instance ci;
+
+		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+			err = -EFAULT;
+			goto out_siblings;
+		}
+
+		siblings[n] = intel_engine_lookup_user(set->ctx->i915,
+						       ci.engine_class,
+						       ci.engine_instance);
+		if (!siblings[n]) {
+			DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			err = -EINVAL;
+			goto out_siblings;
+		}
+	}
+
+	ce = intel_execlists_create_virtual(set->ctx, siblings, n);
+	if (IS_ERR(ce)) {
+		err = PTR_ERR(ce);
+		goto out_siblings;
+	}
+
+	if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
+		intel_context_put(ce);
+		err = -EEXIST;
+		goto out_siblings;
+	}
+
+out_siblings:
+	if (siblings != stack)
+		kfree(siblings);
+
+	return err;
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
+	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
 };
 
 static int
@@ -1696,14 +1791,29 @@ static int clone_engines(struct i915_gem_context *dst,
 
 	clone->i915 = dst->i915;
 	for (n = 0; n < e->num_engines; n++) {
+		struct intel_engine_cs *engine;
+
 		if (!e->engines[n]) {
 			clone->engines[n] = NULL;
 			continue;
 		}
+		engine = e->engines[n]->engine;
 
-		clone->engines[n] =
-			intel_context_create(dst, e->engines[n]->engine);
-		if (!clone->engines[n]) {
+		/*
+		 * Virtual engines are singletons; they can only exist
+		 * inside a single context, because they embed their
+		 * HW context... As each virtual context implies a single
+		 * timeline (each engine can only dequeue a single request
+		 * at any time), it would be surprising for two contexts
+		 * to use the same engine. So let's create a copy of
+		 * the virtual engine instead.
+		 */
+		if (intel_engine_is_virtual(engine))
+			clone->engines[n] =
+				intel_execlists_clone_virtual(dst, engine);
+		else
+			clone->engines[n] = intel_context_create(dst, engine);
+		if (IS_ERR_OR_NULL(clone->engines[n])) {
 			__free_engines(clone, n);
 			goto err_unlock;
 		}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 39bc4f54e272..b58d9c23a876 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -248,17 +248,26 @@ sched_lock_engine(const struct i915_sched_node *node,
 		  struct intel_engine_cs *locked,
 		  struct sched_cache *cache)
 {
-	struct intel_engine_cs *engine = node_to_request(node)->engine;
+	const struct i915_request *rq = node_to_request(node);
+	struct intel_engine_cs *engine;
 
 	GEM_BUG_ON(!locked);
 
-	if (engine != locked) {
+	/*
+	 * Virtual engines complicate acquiring the engine timeline lock,
+	 * as their rq->engine pointer is not stable until under that
+	 * engine lock. The simple ploy we use is to take the lock then
+	 * check that the rq still belongs to the newly locked engine.
+	 */
+	while (locked != (engine = READ_ONCE(rq->engine))) {
 		spin_unlock(&locked->timeline.lock);
 		memset(cache, 0, sizeof(*cache));
 		spin_lock(&engine->timeline.lock);
+		locked = engine;
 	}
 
-	return engine;
+	GEM_BUG_ON(locked != engine);
+	return locked;
 }
 
 static bool inflight(const struct i915_request *rq,
@@ -371,8 +380,11 @@ static void __i915_schedule(struct i915_request *rq,
 		if (prio <= node->attr.priority || node_signaled(node))
 			continue;
 
+		GEM_BUG_ON(node_to_request(node)->engine != engine);
+
 		node->attr.priority = prio;
 		if (!list_empty(&node->link)) {
+			GEM_BUG_ON(intel_engine_is_virtual(engine));
 			if (!cache.priolist)
 				cache.priolist =
 					i915_sched_lookup_priolist(engine,
diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
index 5256a0b5c5f7..1688705f4a2b 100644
--- a/drivers/gpu/drm/i915/i915_timeline_types.h
+++ b/drivers/gpu/drm/i915/i915_timeline_types.h
@@ -26,6 +26,7 @@ struct i915_timeline {
 	spinlock_t lock;
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
+#define TIMELINE_VIRTUAL 2
 	struct mutex mutex; /* protects the flow of requests */
 
 	unsigned int pin_count;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7694113362d4..ff2ababc0984 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -137,6 +137,7 @@ struct i915_engine_class_instance {
 	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
 	__u16 engine_instance;
 #define I915_ENGINE_CLASS_INVALID_NONE -1
+#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
 };
 
 /**
@@ -1607,8 +1608,46 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * To intermix rendering with the virtual engine and direct rendering onto
+ * the backing engines (bypassing the load balancing proxy), the context must
+ * be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+	struct i915_user_extension base;
+
+	__u16 engine_index;
+	__u16 num_siblings;
+	__u32 flags; /* all undefined flags must be zero */
+
+	__u64 mbz64[1]; /* reserved for future use; must be zero */
+
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
+	struct i915_user_extension base; \
+	__u16 engine_index; \
+	__u16 num_siblings; \
+	__u32 flags; \
+	__u64 mbz64[1]; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
 struct i915_context_param_engines {
 	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
 	struct i915_engine_class_instance engines[0];
 } __attribute__((packed));
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (26 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 28/32] drm/i915: Load balancing across a virtual engine Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17 11:43   ` Tvrtko Ursulin
  2019-04-17  7:56 ` [PATCH 30/32] drm/i915: Extend execution fence to support a callback Chris Wilson
                   ` (6 subsequent siblings)
  34 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Allow the user to direct which physical engines of the virtual engine
they wish to execute one, as sometimes it is necessary to override the
load balancing algorithm.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c    |  58 +++++++++++
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_request.c    |   1 +
 drivers/gpu/drm/i915/i915_request.h    |   3 +
 4 files changed, 193 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d6efd6aa67cb..560a18bb4cbb 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -552,6 +552,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 	intel_engine_context_out(rq->engine);
 	execlists_context_status_change(rq, status);
 	trace_i915_request_out(rq);
+
+	/*
+	 * If this is part of a virtual engine, its next request may have
+	 * been blocked waiting for access to the active context. We have
+	 * to kick all the siblings again in case we need to switch (e.g.
+	 * the next request is not runnable on this engine). Hopefully,
+	 * we will already have submitted the next request before the
+	 * tasklet runs and do not need to rebuild each virtual tree
+	 * and kick everyone again.
+	 */
+	if (rq->engine != rq->hw_context->engine)
+		tasklet_schedule(&rq->hw_context->engine->execlists.tasklet);
 }
 
 static u64 execlists_update_context(struct i915_request *rq)
@@ -779,6 +791,9 @@ static bool virtual_matches(const struct virtual_engine *ve,
 {
 	const struct intel_engine_cs *active;
 
+	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
+		return false;
+
 	/*
 	 * We track when the HW has completed saving the context image
 	 * (i.e. when we have seen the final CS event switching out of
@@ -3139,12 +3154,44 @@ static const struct intel_context_ops virtual_context_ops = {
 	.destroy = virtual_context_destroy,
 };
 
+static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
+{
+	struct i915_request *rq;
+	intel_engine_mask_t mask;
+
+	rq = READ_ONCE(ve->request);
+	if (!rq)
+		return 0;
+
+	/* The rq is ready for submission; rq->execution_mask is now stable. */
+	mask = rq->execution_mask;
+	if (unlikely(!mask)) {
+		/* Invalid selection, submit to a random engine in error */
+		i915_request_skip(rq, -ENODEV);
+		mask = ve->siblings[0]->mask;
+	}
+
+	GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
+		  ve->base.name,
+		  rq->fence.context, rq->fence.seqno,
+		  mask, ve->base.execlists.queue_priority_hint);
+
+	return mask;
+}
+
 static void virtual_submission_tasklet(unsigned long data)
 {
 	struct virtual_engine * const ve = (struct virtual_engine *)data;
 	const int prio = ve->base.execlists.queue_priority_hint;
+	intel_engine_mask_t mask;
 	unsigned int n;
 
+	rcu_read_lock();
+	mask = virtual_submission_mask(ve);
+	rcu_read_unlock();
+	if (unlikely(!mask))
+		return;
+
 	local_irq_disable();
 	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
@@ -3152,6 +3199,17 @@ static void virtual_submission_tasklet(unsigned long data)
 		struct rb_node **parent, *rb;
 		bool first;
 
+		if (unlikely(!(mask & sibling->mask))) {
+			if (!RB_EMPTY_NODE(&node->rb)) {
+				spin_lock(&sibling->timeline.lock);
+				rb_erase_cached(&node->rb,
+						&sibling->execlists.virtual);
+				RB_CLEAR_NODE(&node->rb);
+				spin_unlock(&sibling->timeline.lock);
+			}
+			continue;
+		}
+
 		spin_lock(&sibling->timeline.lock);
 
 		if (!RB_EMPTY_NODE(&node->rb)) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index f34aa9e042a3..209e51ef13e6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1480,6 +1480,136 @@ static int live_virtual_engine(void *arg)
 	return err;
 }
 
+static int mask_virtual_engine(struct drm_i915_private *i915,
+			       struct intel_engine_cs **siblings,
+			       unsigned int nsibling)
+{
+	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
+	struct i915_gem_context *ctx;
+	struct intel_context *ve;
+	struct igt_live_test t;
+	unsigned int n;
+	int err;
+
+	/*
+	 * Check that by setting the execution mask on a request, we can
+	 * restrict it to our desired engine within the virtual engine.
+	 */
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		return -ENOMEM;
+
+	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+	if (IS_ERR(ve)) {
+		err = PTR_ERR(ve);
+		goto out_close;
+	}
+
+	err = intel_context_pin(ve);
+	if (err)
+		goto out_put;
+
+	err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
+	if (err)
+		goto out_unpin;
+
+	for (n = 0; n < nsibling; n++) {
+		request[n] = i915_request_create(ve);
+		if (IS_ERR(request)) {
+			err = PTR_ERR(request);
+			nsibling = n;
+			goto out;
+		}
+
+		/* Reverse order as it's more likely to be unnatural */
+		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
+
+		i915_request_get(request[n]);
+		i915_request_add(request[n]);
+	}
+
+	for (n = 0; n < nsibling; n++) {
+		if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) {
+			pr_err("%s(%s): wait for %llx:%lld timed out\n",
+			       __func__, ve->engine->name,
+			       request[n]->fence.context,
+			       request[n]->fence.seqno);
+
+			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+				  __func__, ve->engine->name,
+				  request[n]->fence.context,
+				  request[n]->fence.seqno);
+			GEM_TRACE_DUMP();
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto out;
+		}
+
+		if (request[n]->engine != siblings[nsibling - n - 1]) {
+			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
+			       request[n]->engine->name,
+			       siblings[nsibling - n - 1]->name);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	err = igt_live_test_end(&t);
+	if (err)
+		goto out;
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (n = 0; n < nsibling; n++)
+		i915_request_put(request[n]);
+
+out_unpin:
+	intel_context_unpin(ve);
+out_put:
+	intel_context_put(ve);
+out_close:
+	kernel_context_close(ctx);
+	return err;
+}
+
+static int live_virtual_mask(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+	int err = 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		unsigned int nsibling;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		err = mask_virtual_engine(i915, siblings, nsibling);
+		if (err)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1493,6 +1623,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
+		SUBTEST(live_virtual_mask),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 46f4fc2a8840..78c07e131521 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -688,6 +688,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq->batch = NULL;
 	rq->capture_list = NULL;
 	rq->waitboost = false;
+	rq->execution_mask = ALL_ENGINES;
 
 	INIT_LIST_HEAD(&rq->active_list);
 	INIT_LIST_HEAD(&rq->execute_cb);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8025a89b5999..d7f9b2194568 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -28,6 +28,8 @@
 #include <linux/dma-fence.h>
 #include <linux/lockdep.h>
 
+#include "gt/intel_engine_types.h"
+
 #include "i915_gem.h"
 #include "i915_scheduler.h"
 #include "i915_selftest.h"
@@ -156,6 +158,7 @@ struct i915_request {
 	 */
 	struct i915_sched_node sched;
 	struct i915_dependency dep;
+	intel_engine_mask_t execution_mask;
 
 	/*
 	 * A convenience pointer to the current breadcrumb value stored in
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 30/32] drm/i915: Extend execution fence to support a callback
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (27 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  7:56 ` [PATCH 31/32] drm/i915/execlists: Virtual engine bonding Chris Wilson
                   ` (5 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will want to configure the slave request
depending on which physical engine the master request is executed on.
For this, we introduce a callback from the execute fence to convey this
information.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c | 84 +++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_request.h |  4 ++
 2 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 78c07e131521..7c37244539b4 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -38,6 +38,8 @@ struct execute_cb {
 	struct list_head link;
 	struct irq_work work;
 	struct i915_sw_fence *fence;
+	void (*hook)(struct i915_request *rq, struct dma_fence *signal);
+	struct i915_request *signal;
 };
 
 static struct i915_global_request {
@@ -329,6 +331,17 @@ static void irq_execute_cb(struct irq_work *wrk)
 	kmem_cache_free(global.slab_execute_cbs, cb);
 }
 
+static void irq_execute_cb_hook(struct irq_work *wrk)
+{
+	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+	cb->hook(container_of(cb->fence, struct i915_request, submit),
+		 &cb->signal->fence);
+	i915_request_put(cb->signal);
+
+	irq_execute_cb(wrk);
+}
+
 static void __notify_execute_cb(struct i915_request *rq)
 {
 	struct execute_cb *cb;
@@ -355,14 +368,19 @@ static void __notify_execute_cb(struct i915_request *rq)
 }
 
 static int
-i915_request_await_execution(struct i915_request *rq,
-			     struct i915_request *signal,
-			     gfp_t gfp)
+__i915_request_await_execution(struct i915_request *rq,
+			       struct i915_request *signal,
+			       void (*hook)(struct i915_request *rq,
+					    struct dma_fence *signal),
+			       gfp_t gfp)
 {
 	struct execute_cb *cb;
 
-	if (i915_request_is_active(signal))
+	if (i915_request_is_active(signal)) {
+		if (hook)
+			hook(rq, &signal->fence);
 		return 0;
+	}
 
 	cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
 	if (!cb)
@@ -372,8 +390,18 @@ i915_request_await_execution(struct i915_request *rq,
 	i915_sw_fence_await(cb->fence);
 	init_irq_work(&cb->work, irq_execute_cb);
 
+	if (hook) {
+		cb->hook = hook;
+		cb->signal = i915_request_get(signal);
+		cb->work.func = irq_execute_cb_hook;
+	}
+
 	spin_lock_irq(&signal->lock);
 	if (i915_request_is_active(signal)) {
+		if (hook) {
+			hook(rq, &signal->fence);
+			i915_request_put(signal);
+		}
 		i915_sw_fence_complete(cb->fence);
 		kmem_cache_free(global.slab_execute_cbs, cb);
 	} else {
@@ -802,7 +830,7 @@ emit_semaphore_wait(struct i915_request *to,
 		return err;
 
 	/* Only submit our spinner after the signaler is running! */
-	err = i915_request_await_execution(to, from, gfp);
+	err = __i915_request_await_execution(to, from, NULL, gfp);
 	if (err)
 		return err;
 
@@ -923,6 +951,52 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 	return 0;
 }
 
+int
+i915_request_await_execution(struct i915_request *rq,
+			     struct dma_fence *fence,
+			     void (*hook)(struct i915_request *rq,
+					  struct dma_fence *signal))
+{
+	struct dma_fence **child = &fence;
+	unsigned int nchild = 1;
+	int ret;
+
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+
+		/* XXX Error for signal-on-any fence arrays */
+
+		child = array->fences;
+		nchild = array->num_fences;
+		GEM_BUG_ON(!nchild);
+	}
+
+	do {
+		fence = *child++;
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			continue;
+
+		/*
+		 * We don't squash repeated fence dependencies here as we
+		 * want to run our callback in all cases.
+		 */
+
+		if (dma_fence_is_i915(fence))
+			ret = __i915_request_await_execution(rq,
+							     to_request(fence),
+							     hook,
+							     I915_FENCE_GFP);
+		else
+			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
+							    I915_FENCE_TIMEOUT,
+							    GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	} while (--nchild);
+
+	return 0;
+}
+
 /**
  * i915_request_await_object - set this request to (async) wait upon a bo
  * @to: request we are wishing to use
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index d7f9b2194568..c9f7d07991c8 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -283,6 +283,10 @@ int i915_request_await_object(struct i915_request *to,
 			      bool write);
 int i915_request_await_dma_fence(struct i915_request *rq,
 				 struct dma_fence *fence);
+int i915_request_await_execution(struct i915_request *rq,
+				 struct dma_fence *fence,
+				 void (*hook)(struct i915_request *rq,
+					      struct dma_fence *signal));
 
 void i915_request_add(struct i915_request *rq);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 31/32] drm/i915/execlists: Virtual engine bonding
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (28 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 30/32] drm/i915: Extend execution fence to support a callback Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-18  6:47   ` Tvrtko Ursulin
  2019-04-17  7:56 ` [PATCH 32/32] drm/i915: Allow specification of parallel execbuf Chris Wilson
                   ` (4 subsequent siblings)
  34 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

Some users require that when a master batch is executed on one particular
engine, a companion batch is run simultaneously on a specific slave
engine. For this purpose, we introduce virtual engine bonding, allowing
maps of master:slaves to be constructed to constrain which physical
engines a virtual engine may select given a fence on a master engine.

For the moment, we continue to ignore the issue of preemption deferring
the master request for later. Ideally, we would like to then also remove
the slave and run something else rather than have it stall the pipeline.
With load balancing, we should be able to move workload around it, but
there is a similar stall on the master pipeline while it may wait for
the slave to be executed. At the cost of more latency for the bonded
request, it may be interesting to launch both on their engines in
lockstep. (Bubbles abound.)

Opens: Also what about bonding an engine as its own master? It doesn't
break anything internally, so allow the silliness.

v2: Emancipate the bonds
v3: Couple in delayed scheduling for the selftests
v4: Handle invalid mutually exclusive bonding
v5: Mention what the uapi does
v6: s/nbond/num_bonds/

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   7 +
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  98 +++++++++
 drivers/gpu/drm/i915/gt/intel_lrc.h           |   4 +
 drivers/gpu/drm/i915/gt/selftest_lrc.c        | 191 ++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.c       |  86 ++++++++
 drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
 include/uapi/drm/i915_drm.h                   |  35 ++++
 7 files changed, 424 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 6dceb78e95d7..18b9175835c7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -405,6 +405,13 @@ struct intel_engine_cs {
 	 */
 	void		(*submit_request)(struct i915_request *rq);
 
+	/*
+	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
+	 * request down to the bonded pairs.
+	 */
+	void            (*bond_execute)(struct i915_request *rq,
+					struct dma_fence *signal);
+
 	/*
 	 * Call when the priority on a request has changed and it and its
 	 * dependencies may need rescheduling. Note the request itself may
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 560a18bb4cbb..1b5b0937be25 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -191,6 +191,18 @@ struct virtual_engine {
 		int prio;
 	} nodes[I915_NUM_ENGINES];
 
+	/*
+	 * Keep track of bonded pairs -- restrictions upon on our selection
+	 * of physical engines any particular request may be submitted to.
+	 * If we receive a submit-fence from a master engine, we will only
+	 * use one of sibling_mask physical engines.
+	 */
+	struct ve_bond {
+		const struct intel_engine_cs *master;
+		intel_engine_mask_t sibling_mask;
+	} *bonds;
+	unsigned int num_bonds;
+
 	/* And finally, which physical engines this virtual engine maps onto. */
 	unsigned int num_siblings;
 	struct intel_engine_cs *siblings[0];
@@ -969,6 +981,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			rb_erase_cached(rb, &execlists->virtual);
 			RB_CLEAR_NODE(rb);
 
+			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
 			rq->engine = engine;
 
 			if (engine != ve->siblings[0]) {
@@ -3082,6 +3095,8 @@ static void virtual_context_destroy(struct kref *kref)
 	if (ve->context.state)
 		__execlists_context_fini(&ve->context);
 
+	kfree(ve->bonds);
+
 	i915_timeline_fini(&ve->base.timeline);
 	kfree(ve);
 }
@@ -3277,6 +3292,38 @@ static void virtual_submit_request(struct i915_request *rq)
 	tasklet_schedule(&ve->base.execlists.tasklet);
 }
 
+static struct ve_bond *
+virtual_find_bond(struct virtual_engine *ve,
+		  const struct intel_engine_cs *master)
+{
+	int i;
+
+	for (i = 0; i < ve->num_bonds; i++) {
+		if (ve->bonds[i].master == master)
+			return &ve->bonds[i];
+	}
+
+	return NULL;
+}
+
+static void
+virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->engine);
+	struct ve_bond *bond;
+
+	bond = virtual_find_bond(ve, to_request(signal)->engine);
+	if (bond) {
+		intel_engine_mask_t old, new, cmp;
+
+		cmp = READ_ONCE(rq->execution_mask);
+		do {
+			old = cmp;
+			new = cmp & bond->sibling_mask;
+		} while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
+	}
+}
+
 struct intel_context *
 intel_execlists_create_virtual(struct i915_gem_context *ctx,
 			       struct intel_engine_cs **siblings,
@@ -3315,6 +3362,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 
 	ve->base.schedule = i915_schedule;
 	ve->base.submit_request = virtual_submit_request;
+	ve->base.bond_execute = virtual_bond_execute;
 
 	ve->base.execlists.queue_priority_hint = INT_MIN;
 	tasklet_init(&ve->base.execlists.tasklet,
@@ -3404,9 +3452,59 @@ intel_execlists_clone_virtual(struct i915_gem_context *ctx,
 	if (IS_ERR(dst))
 		return dst;
 
+	if (se->num_bonds) {
+		struct virtual_engine *de = to_virtual_engine(dst->engine);
+
+		de->bonds = kmemdup(se->bonds,
+				    sizeof(*se->bonds) * se->num_bonds,
+				    GFP_KERNEL);
+		if (!de->bonds) {
+			intel_context_put(dst);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		de->num_bonds = se->num_bonds;
+	}
+
 	return dst;
 }
 
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     const struct intel_engine_cs *master,
+				     const struct intel_engine_cs *sibling)
+{
+	struct virtual_engine *ve = to_virtual_engine(engine);
+	struct ve_bond *bond;
+	int n;
+
+	/* Sanity check the sibling is part of the virtual engine */
+	for (n = 0; n < ve->num_siblings; n++)
+		if (sibling == ve->siblings[n])
+			break;
+	if (n == ve->num_siblings)
+		return -EINVAL;
+
+	bond = virtual_find_bond(ve, master);
+	if (bond) {
+		bond->sibling_mask |= sibling->mask;
+		return 0;
+	}
+
+	bond = krealloc(ve->bonds,
+			sizeof(*bond) * (ve->num_bonds + 1),
+			GFP_KERNEL);
+	if (!bond)
+		return -ENOMEM;
+
+	bond[ve->num_bonds].master = master;
+	bond[ve->num_bonds].sibling_mask = sibling->mask;
+
+	ve->bonds = bond;
+	ve->num_bonds++;
+
+	return 0;
+}
+
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   struct drm_printer *m,
 				   void (*show_request)(struct drm_printer *m,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 5530606052e5..e029aee87adf 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -123,4 +123,8 @@ struct intel_context *
 intel_execlists_clone_virtual(struct i915_gem_context *ctx,
 			      struct intel_engine_cs *src);
 
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     const struct intel_engine_cs *master,
+				     const struct intel_engine_cs *sibling);
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 209e51ef13e6..3f456a8b727b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -13,6 +13,7 @@
 #include "selftests/igt_gem_utils.h"
 #include "selftests/igt_live_test.h"
 #include "selftests/igt_spinner.h"
+#include "selftests/lib_sw_fence.h"
 #include "selftests/mock_context.h"
 
 static int live_sanitycheck(void *arg)
@@ -1610,6 +1611,195 @@ static int live_virtual_mask(void *arg)
 	return err;
 }
 
+static int bond_virtual_engine(struct drm_i915_private *i915,
+			       unsigned int class,
+			       struct intel_engine_cs **siblings,
+			       unsigned int nsibling,
+			       unsigned int flags)
+#define BOND_SCHEDULE BIT(0)
+{
+	struct intel_engine_cs *master;
+	struct i915_gem_context *ctx;
+	struct i915_request *rq[16];
+	enum intel_engine_id id;
+	unsigned long n;
+	int err;
+
+	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		return -ENOMEM;
+
+	err = 0;
+	rq[0] = ERR_PTR(-ENOMEM);
+	for_each_engine(master, i915, id) {
+		struct i915_sw_fence fence = {};
+
+		if (master->class == class)
+			continue;
+
+		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
+
+		rq[0] = igt_request_alloc(ctx, master);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			goto out;
+		}
+		i915_request_get(rq[0]);
+
+		if (flags & BOND_SCHEDULE) {
+			onstack_fence_init(&fence);
+			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
+							       &fence,
+							       GFP_KERNEL);
+		}
+		i915_request_add(rq[0]);
+		if (err < 0)
+			goto out;
+
+		for (n = 0; n < nsibling; n++) {
+			struct intel_context *ve;
+
+			ve = intel_execlists_create_virtual(ctx,
+							    siblings,
+							    nsibling);
+			if (IS_ERR(ve)) {
+				err = PTR_ERR(ve);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			err = intel_virtual_engine_attach_bond(ve->engine,
+							       master,
+							       siblings[n]);
+			if (err) {
+				intel_context_put(ve);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			err = intel_context_pin(ve);
+			intel_context_put(ve);
+			if (err) {
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			rq[n + 1] = i915_request_create(ve);
+			intel_context_unpin(ve);
+			if (IS_ERR(rq[n + 1])) {
+				err = PTR_ERR(rq[n + 1]);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+			i915_request_get(rq[n + 1]);
+
+			err = i915_request_await_execution(rq[n + 1],
+							   &rq[0]->fence,
+							   ve->engine->bond_execute);
+			i915_request_add(rq[n + 1]);
+			if (err < 0) {
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+		}
+		onstack_fence_fini(&fence);
+
+		if (i915_request_wait(rq[0],
+				      I915_WAIT_LOCKED,
+				      HZ / 10) < 0) {
+			pr_err("Master request did not execute (on %s)!\n",
+			       rq[0]->engine->name);
+			err = -EIO;
+			goto out;
+		}
+
+		for (n = 0; n < nsibling; n++) {
+			if (i915_request_wait(rq[n + 1],
+					      I915_WAIT_LOCKED,
+					      MAX_SCHEDULE_TIMEOUT) < 0) {
+				err = -EIO;
+				goto out;
+			}
+
+			if (rq[n + 1]->engine != siblings[n]) {
+				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
+				       siblings[n]->name,
+				       rq[n + 1]->engine->name,
+				       rq[0]->engine->name);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+		for (n = 0; !IS_ERR(rq[n]); n++)
+			i915_request_put(rq[n]);
+		rq[0] = ERR_PTR(-ENOMEM);
+	}
+
+out:
+	for (n = 0; !IS_ERR(rq[n]); n++)
+		i915_request_put(rq[n]);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	kernel_context_close(ctx);
+	return err;
+}
+
+static int live_virtual_bond(void *arg)
+{
+	static const struct phase {
+		const char *name;
+		unsigned int flags;
+	} phases[] = {
+		{ "", 0 },
+		{ "schedule", BOND_SCHEDULE },
+		{ },
+	};
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+	int err = 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		const struct phase *p;
+		int nsibling;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (p = phases; p->name; p++) {
+			err = bond_virtual_engine(i915,
+						  class, siblings, nsibling,
+						  p->flags);
+			if (err) {
+				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
+				       __func__, p->name, class, nsibling, err);
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1624,6 +1814,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
 		SUBTEST(live_virtual_mask),
+		SUBTEST(live_virtual_bond),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 57b09f624bb4..7418a2742f0f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1491,8 +1491,94 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
 	return err;
 }
 
+static int
+set_engines__bond(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_bond __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *virtual;
+	struct intel_engine_cs *master;
+	u16 class, instance;
+	u16 idx, num_bonds;
+	int err, n;
+
+	if (get_user(idx, &ext->virtual_index))
+		return -EFAULT;
+
+	if (idx >= set->engines->num_engines) {
+		DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n",
+			  idx, set->engines->num_engines);
+		return -EINVAL;
+	}
+
+	idx = array_index_nospec(idx, set->engines->num_engines);
+	if (!set->engines->engines[idx]) {
+		DRM_DEBUG("Invalid engine at %d\n", idx);
+		return -EINVAL;
+	}
+
+	/*
+	 * A non-virtual engine has 0 siblings to choose between; and submit
+	 * fence will always be directed to the one engine.
+	 */
+	virtual = set->engines->engines[idx]->engine;
+	if (!intel_engine_is_virtual(virtual))
+		return 0;
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+		err = check_user_mbz(&ext->mbz64[n]);
+		if (err)
+			return err;
+	}
+
+	if (get_user(class, &ext->master_class))
+		return -EFAULT;
+
+	if (get_user(instance, &ext->master_instance))
+		return -EFAULT;
+
+	master = intel_engine_lookup_user(set->ctx->i915, class, instance);
+	if (!master) {
+		DRM_DEBUG("Unrecognised master engine: { class:%d, instance:%d }\n",
+			  class, instance);
+		return -EINVAL;
+	}
+
+	if (get_user(num_bonds, &ext->num_bonds))
+		return -EFAULT;
+
+	for (n = 0; n < num_bonds; n++) {
+		struct intel_engine_cs *bond;
+		struct i915_engine_class_instance ci;
+
+		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci)))
+			return -EFAULT;
+
+		bond = intel_engine_lookup_user(set->ctx->i915,
+						ci.engine_class,
+						ci.engine_instance);
+		if (!bond) {
+			DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			return -EINVAL;
+		}
+
+		err = intel_virtual_engine_attach_bond(virtual, master, bond);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
 	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
 };
 
 static int
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
index 2bfa72c1654b..b976c12817c5 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
@@ -45,6 +45,9 @@ void __onstack_fence_init(struct i915_sw_fence *fence,
 
 void onstack_fence_fini(struct i915_sw_fence *fence)
 {
+	if (!fence->flags)
+		return;
+
 	i915_sw_fence_commit(fence);
 	i915_sw_fence_fini(fence);
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ff2ababc0984..091872d24588 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1543,6 +1543,10 @@ struct drm_i915_gem_context_param {
  * sized argument, will revert back to default settings.
  *
  * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
+ *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
  */
 #define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
@@ -1645,9 +1649,40 @@ struct i915_context_engines_load_balance {
 	struct i915_engine_class_instance engines[N__]; \
 } __attribute__((packed)) name__
 
+/*
+ * i915_context_engines_bond:
+ *
+ * Constructed bonded pairs for execution within a virtual engine.
+ *
+ * All engines are equal, but some are more equal than others. Given
+ * the distribution of resources in the HW, it may be preferable to run
+ * a request on a given subset of engines in parallel to a request on a
+ * specific engine. We enable this selection of engines within a virtual
+ * engine by specifying bonding pairs, for any given master engine we will
+ * only execute on one of the corresponding siblings within the virtual engine.
+ *
+ * To execute a request in parallel on the master engine and a sibling requires
+ * coordination with a I915_EXEC_FENCE_SUBMIT.
+ */
+struct i915_context_engines_bond {
+	struct i915_user_extension base;
+
+	__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
+	__u16 num_bonds;
+
+	__u16 master_class;
+	__u16 master_instance;
+
+	__u64 flags; /* all undefined flags must be zero */
+	__u64 mbz64[4]; /* reserved for future use; must be zero */
+
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
 struct i915_context_param_engines {
 	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
 #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
+#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
 	struct i915_engine_class_instance engines[0];
 } __attribute__((packed));
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* [PATCH 32/32] drm/i915: Allow specification of parallel execbuf
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (29 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 31/32] drm/i915/execlists: Virtual engine bonding Chris Wilson
@ 2019-04-17  7:56 ` Chris Wilson
  2019-04-17  8:46 ` [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (3 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  7:56 UTC (permalink / raw)
  To: intel-gfx

There is a desire to split a task onto two engines and have them run at
the same time, e.g. scanline interleaving to spread the workload evenly.
Through the use of the out-fence from the first execbuf, we can
coordinate secondary execbuf to only become ready simultaneously with
the first, so that with all things idle the second execbufs are executed
in parallel with the first. The key difference here between the new
EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
waits for the completion of the first request (so that all of its
rendering results are visible to the second execbuf, the more common
userspace fence requirement).

Since we only have a single input fence slot, userspace cannot mix an
in-fence and a submit-fence. It has to use one or the other! This is not
such a harsh requirement, since by virtue of the submit-fence, the
secondary execbuf inherit all of the dependencies from the first
request, and for the application the dependencies should be common
between the primary and secondary execbuf.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Testcase: igt/gem_exec_fence/parallel
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++++++++++++-
 include/uapi/drm/i915_drm.h                | 17 ++++++++++++++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ea9b4eb3bf9b..6e3061e7d2fd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -435,6 +435,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_CAPTURE:
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
+	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d6c5220addd0..7ce25b54c57b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2318,6 +2318,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 {
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
+	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
 	int out_fence_fd = -1;
 	int err;
@@ -2360,11 +2361,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			return -EINVAL;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
+		if (in_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+
+		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!exec_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+	}
+
 	if (args->flags & I915_EXEC_FENCE_OUT) {
 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
 		if (out_fence_fd < 0) {
 			err = out_fence_fd;
-			goto err_in_fence;
+			goto err_exec_fence;
 		}
 	}
 
@@ -2494,6 +2508,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			goto err_request;
 	}
 
+	if (exec_fence) {
+		err = i915_request_await_execution(eb.request, exec_fence,
+						   eb.engine->bond_execute);
+		if (err < 0)
+			goto err_request;
+	}
+
 	if (fences) {
 		err = await_fence_array(&eb, fences);
 		if (err)
@@ -2555,6 +2576,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
+err_exec_fence:
+	dma_fence_put(exec_fence);
 err_in_fence:
 	dma_fence_put(in_fence);
 	return err;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 091872d24588..86e41acf085e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -604,6 +604,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT	52
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1126,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/*
+ * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_SUBMIT		(1 << 20)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 68+ messages in thread

* Re: [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (30 preceding siblings ...)
  2019-04-17  7:56 ` [PATCH 32/32] drm/i915: Allow specification of parallel execbuf Chris Wilson
@ 2019-04-17  8:46 ` Chris Wilson
  2019-04-17 11:33 ` ✗ Fi.CI.BAT: failure for series starting with [01/32] " Patchwork
                   ` (2 subsequent siblings)
  34 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17  8:46 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2019-04-17 08:56:26)
> @@ -294,8 +304,8 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>  {
>         struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
>  
> -       if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
> -               return;
> +       lockdep_assert_held(&rq->lock);
> +       lockdep_assert_irqs_disabled();
>  
>         spin_lock(&b->irq_lock);
>         if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {

@@ -307,6 +307,12 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
        lockdep_assert_held(&rq->lock);
        lockdep_assert_irqs_disabled();

+       /*
+        * We must wait for b->irq_lock so that we know the interrupt handler
+        * has released its reference to the intel_context and has completed
+        * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
+        * required).
+        */
        spin_lock(&b->irq_lock);
        if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
                struct intel_context *ce = rq->hw_context;

Just a touch of explanation,
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 04/32] drm/i915: Make workaround verification *optional*
  2019-04-17  7:56 ` [PATCH 04/32] drm/i915: Make workaround verification *optional* Chris Wilson
@ 2019-04-17  9:37   ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17  9:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> Sometimes the HW doesn't even play fair, and completely forgets about
> register writes. Skip verifying known troublemakers.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=108954
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_workarounds.c      | 40 ++++++++++++++-----
>   .../gpu/drm/i915/intel_workarounds_types.h    |  7 ++--
>   2 files changed, 33 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index 89e2c603e34b..b3cbed1ee1c9 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -122,6 +122,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
>   			wal->wa_count++;
>   			wa_->val |= wa->val;
>   			wa_->mask |= wa->mask;
> +			wa_->read |= wa->read;
>   			return;
>   		}
>   	}
> @@ -146,9 +147,10 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
>   		   u32 val)
>   {
>   	struct i915_wa wa = {
> -		.reg = reg,
> +		.reg  = reg,
>   		.mask = mask,
> -		.val = val
> +		.val  = val,
> +		.read = mask,
>   	};
>   
>   	_wa_add(wal, &wa);
> @@ -172,6 +174,19 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
>   	wa_write_masked_or(wal, reg, val, val);
>   }
>   
> +static void
> +ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
> +{
> +	struct i915_wa wa = {
> +		.reg  = reg,
> +		.mask = mask,
> +		.val  = val,
> +		/* Bonkers HW, skip verifying */
> +	};
> +
> +	_wa_add(wal, &wa);
> +}
> +
>   #define WA_SET_BIT_MASKED(addr, mask) \
>   	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
>   
> @@ -916,10 +931,11 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
>   static bool
>   wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
>   {
> -	if ((cur ^ wa->val) & wa->mask) {
> +	if ((cur ^ wa->val) & wa->read) {
>   		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
> -			  name, from, i915_mmio_reg_offset(wa->reg), cur,
> -			  cur & wa->mask, wa->val, wa->mask);
> +			  name, from, i915_mmio_reg_offset(wa->reg),
> +			  cur, cur & wa->read,
> +			  wa->val, wa->mask);
>   
>   		return false;
>   	}
> @@ -1122,9 +1138,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>   			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
>   
>   		/* WaPipelineFlushCoherentLines:icl */
> -		wa_write_or(wal,
> -			    GEN8_L3SQCREG4,
> -			    GEN8_LQSC_FLUSH_COHERENT_LINES);
> +		ignore_wa_write_or(wal,
> +				   GEN8_L3SQCREG4,
> +				   GEN8_LQSC_FLUSH_COHERENT_LINES,
> +				   GEN8_LQSC_FLUSH_COHERENT_LINES);
>   
>   		/*
>   		 * Wa_1405543622:icl
> @@ -1151,9 +1168,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>   		 * Wa_1405733216:icl
>   		 * Formerly known as WaDisableCleanEvicts
>   		 */
> -		wa_write_or(wal,
> -			    GEN8_L3SQCREG4,
> -			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
> +		ignore_wa_write_or(wal,
> +				   GEN8_L3SQCREG4,
> +				   GEN11_LQSC_CLEAN_EVICT_DISABLE,
> +				   GEN11_LQSC_CLEAN_EVICT_DISABLE);
>   
>   		/* WaForwardProgressSoftReset:icl */
>   		wa_write_or(wal,
> diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/intel_workarounds_types.h
> index 30918da180ff..42ac1fb99572 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds_types.h
> +++ b/drivers/gpu/drm/i915/intel_workarounds_types.h
> @@ -12,9 +12,10 @@
>   #include "i915_reg.h"
>   
>   struct i915_wa {
> -	i915_reg_t	  reg;
> -	u32		  mask;
> -	u32		  val;
> +	i915_reg_t	reg;
> +	u32		mask;
> +	u32		val;
> +	u32		read;
>   };
>   
>   struct i915_wa_list {
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 06/32] drm/i915: Store the default sseu setup on the engine
  2019-04-17  7:56 ` [PATCH 06/32] drm/i915: Store the default sseu setup on the engine Chris Wilson
@ 2019-04-17  9:40   ` Tvrtko Ursulin
  2019-04-24  9:45     ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17  9:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> As we push for better compartmentalisation, it is more convenient to
> copy the default sseu configuration from the engine into the derived
> logical context, than it is to dig it out from i915->runtime_info.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile                 |   1 +
>   drivers/gpu/drm/i915/Makefile.header-test     |   1 +
>   drivers/gpu/drm/i915/i915_drv.h               |  14 --
>   drivers/gpu/drm/i915/i915_gem_context.c       |   2 +-
>   drivers/gpu/drm/i915/i915_perf.c              |   2 +-
>   drivers/gpu/drm/i915/intel_context.c          |   4 +-
>   drivers/gpu/drm/i915/intel_context_types.h    |  11 +-
>   drivers/gpu/drm/i915/intel_device_info.h      |  28 +---
>   drivers/gpu/drm/i915/intel_engine_cs.c        |   4 +
>   drivers/gpu/drm/i915/intel_engine_types.h     |   3 +
>   drivers/gpu/drm/i915/intel_lrc.c              | 134 +----------------
>   drivers/gpu/drm/i915/intel_lrc.h              |   2 -
>   drivers/gpu/drm/i915/intel_sseu.c             | 142 ++++++++++++++++++
>   drivers/gpu/drm/i915/intel_sseu.h             |  67 +++++++++
>   .../gpu/drm/i915/selftests/i915_gem_context.c |   5 +-
>   15 files changed, 226 insertions(+), 194 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/intel_sseu.c
>   create mode 100644 drivers/gpu/drm/i915/intel_sseu.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index fbcb0904f4a8..53ff209b91bb 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -95,6 +95,7 @@ i915-y += \
>   	  intel_lrc.o \
>   	  intel_mocs.o \
>   	  intel_ringbuffer.o \
> +	  intel_sseu.o \
>   	  intel_uncore.o \
>   	  intel_wopcm.o
>   
> diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
> index c1c391816fa7..5bcc78d7ac96 100644
> --- a/drivers/gpu/drm/i915/Makefile.header-test
> +++ b/drivers/gpu/drm/i915/Makefile.header-test
> @@ -33,6 +33,7 @@ header_test := \
>   	intel_psr.h \
>   	intel_sdvo.h \
>   	intel_sprite.h \
> +	intel_sseu.h \
>   	intel_tv.h \
>   	intel_workarounds_types.h
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 35d0782c077e..7b5da9eddc1c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3387,20 +3387,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
>   	return (struct intel_device_info *)INTEL_INFO(dev_priv);
>   }
>   
> -static inline struct intel_sseu
> -intel_device_default_sseu(struct drm_i915_private *i915)
> -{
> -	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
> -	struct intel_sseu value = {
> -		.slice_mask = sseu->slice_mask,
> -		.subslice_mask = sseu->subslice_mask[0],
> -		.min_eus_per_subslice = sseu->max_eus_per_subslice,
> -		.max_eus_per_subslice = sseu->max_eus_per_subslice,
> -	};
> -
> -	return value;
> -}
> -
>   /* modesetting */
>   extern void intel_modeset_init_hw(struct drm_device *dev);
>   extern int intel_modeset_init(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index dd728b26b5aa..c02a30612df9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1156,7 +1156,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
>   	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
>   	*cs++ = lower_32_bits(offset);
>   	*cs++ = upper_32_bits(offset);
> -	*cs++ = gen8_make_rpcs(rq->i915, &sseu);
> +	*cs++ = intel_sseu_make_rpcs(rq->i915, &sseu);
>   
>   	intel_ring_advance(rq, cs);
>   
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 39a4804091d7..56da457bed21 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1679,7 +1679,7 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
>   
>   	CTX_REG(reg_state,
>   		CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> -		gen8_make_rpcs(i915, &ce->sseu));
> +		intel_sseu_make_rpcs(i915, &ce->sseu));
>   }
>   
>   /*
> diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
> index 8931e0fee873..961d1445833d 100644
> --- a/drivers/gpu/drm/i915/intel_context.c
> +++ b/drivers/gpu/drm/i915/intel_context.c
> @@ -230,15 +230,13 @@ intel_context_init(struct intel_context *ce,
>   	ce->gem_context = ctx;
>   	ce->engine = engine;
>   	ce->ops = engine->cops;
> +	ce->sseu = engine->sseu;
>   
>   	INIT_LIST_HEAD(&ce->signal_link);
>   	INIT_LIST_HEAD(&ce->signals);
>   
>   	mutex_init(&ce->pin_mutex);
>   
> -	/* Use the whole device by default */
> -	ce->sseu = intel_device_default_sseu(ctx->i915);
> -
>   	i915_active_request_init(&ce->active_tracker,
>   				 NULL, intel_context_retire);
>   }
> diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
> index 68b4ca1611e0..9ec4f787c908 100644
> --- a/drivers/gpu/drm/i915/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/intel_context_types.h
> @@ -14,6 +14,7 @@
>   #include <linux/types.h>
>   
>   #include "i915_active_types.h"
> +#include "intel_sseu.h"
>   
>   struct i915_gem_context;
>   struct i915_vma;
> @@ -28,16 +29,6 @@ struct intel_context_ops {
>   	void (*destroy)(struct kref *kref);
>   };
>   
> -/*
> - * Powergating configuration for a particular (context,engine).
> - */
> -struct intel_sseu {
> -	u8 slice_mask;
> -	u8 subslice_mask;
> -	u8 min_eus_per_subslice;
> -	u8 max_eus_per_subslice;
> -};
> -
>   struct intel_context {
>   	struct kref ref;
>   
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 0e579f158016..3045e0dee2a1 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -29,6 +29,7 @@
>   
>   #include "intel_engine_types.h"
>   #include "intel_display.h"
> +#include "intel_sseu.h"
>   
>   struct drm_printer;
>   struct drm_i915_private;
> @@ -139,33 +140,6 @@ enum intel_ppgtt_type {
>   	func(overlay_needs_physical); \
>   	func(supports_tv);
>   
> -#define GEN_MAX_SLICES		(6) /* CNL upper bound */
> -#define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
> -
> -struct sseu_dev_info {
> -	u8 slice_mask;
> -	u8 subslice_mask[GEN_MAX_SLICES];
> -	u16 eu_total;
> -	u8 eu_per_subslice;
> -	u8 min_eu_in_pool;
> -	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
> -	u8 subslice_7eu[3];
> -	u8 has_slice_pg:1;
> -	u8 has_subslice_pg:1;
> -	u8 has_eu_pg:1;
> -
> -	/* Topology fields */
> -	u8 max_slices;
> -	u8 max_subslices;
> -	u8 max_eus_per_subslice;
> -
> -	/* We don't have more than 8 eus per subslice at the moment and as we
> -	 * store eus enabled using bits, no need to multiply by eus per
> -	 * subslice.
> -	 */
> -	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
> -};
> -
>   struct intel_device_info {
>   	u16 gen_mask;
>   
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index eea9bec04f1b..ad2a683d97f7 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -588,6 +588,10 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
>   	intel_engine_init_batch_pool(engine);
>   	intel_engine_init_cmd_parser(engine);
>   
> +	/* Use the whole device by default */
> +	engine->sseu =
> +		intel_device_default_sseu(&RUNTIME_INFO(engine->i915)->sseu);
> +
>   	return 0;
>   
>   err_hwsp:
> diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
> index 1f970c76b6a6..d07a01b3ed0b 100644
> --- a/drivers/gpu/drm/i915/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/intel_engine_types.h
> @@ -17,6 +17,7 @@
>   #include "i915_priolist_types.h"
>   #include "i915_selftest.h"
>   #include "i915_timeline_types.h"
> +#include "intel_sseu.h"
>   #include "intel_workarounds_types.h"
>   
>   #include "i915_gem_batch_pool.h"
> @@ -278,6 +279,8 @@ struct intel_engine_cs {
>   	u32 context_size;
>   	u32 mmio_base;
>   
> +	struct intel_sseu sseu;
> +
>   	struct intel_ring *buffer;
>   
>   	struct i915_timeline timeline;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 4e0a351bfbca..18a9dc6ca877 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1232,7 +1232,7 @@ __execlists_update_reg_state(struct intel_context *ce,
>   	/* RPCS */
>   	if (engine->class == RENDER_CLASS)
>   		regs[CTX_R_PWR_CLK_STATE + 1] =
> -			gen8_make_rpcs(engine->i915, &ce->sseu);
> +			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
>   }
>   
>   static int
> @@ -2551,138 +2551,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
>   	return logical_ring_init(engine);
>   }
>   
> -u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
> -{
> -	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
> -	bool subslice_pg = sseu->has_subslice_pg;
> -	struct intel_sseu ctx_sseu;
> -	u8 slices, subslices;
> -	u32 rpcs = 0;
> -
> -	/*
> -	 * No explicit RPCS request is needed to ensure full
> -	 * slice/subslice/EU enablement prior to Gen9.
> -	*/
> -	if (INTEL_GEN(i915) < 9)
> -		return 0;
> -
> -	/*
> -	 * If i915/perf is active, we want a stable powergating configuration
> -	 * on the system.
> -	 *
> -	 * We could choose full enablement, but on ICL we know there are use
> -	 * cases which disable slices for functional, apart for performance
> -	 * reasons. So in this case we select a known stable subset.
> -	 */
> -	if (!i915->perf.oa.exclusive_stream) {
> -		ctx_sseu = *req_sseu;
> -	} else {
> -		ctx_sseu = intel_device_default_sseu(i915);
> -
> -		if (IS_GEN(i915, 11)) {
> -			/*
> -			 * We only need subslice count so it doesn't matter
> -			 * which ones we select - just turn off low bits in the
> -			 * amount of half of all available subslices per slice.
> -			 */
> -			ctx_sseu.subslice_mask =
> -				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
> -			ctx_sseu.slice_mask = 0x1;
> -		}
> -	}
> -
> -	slices = hweight8(ctx_sseu.slice_mask);
> -	subslices = hweight8(ctx_sseu.subslice_mask);
> -
> -	/*
> -	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
> -	 * wide and Icelake has up to eight subslices, specfial programming is
> -	 * needed in order to correctly enable all subslices.
> -	 *
> -	 * According to documentation software must consider the configuration
> -	 * as 2x4x8 and hardware will translate this to 1x8x8.
> -	 *
> -	 * Furthemore, even though SScount is three bits, maximum documented
> -	 * value for it is four. From this some rules/restrictions follow:
> -	 *
> -	 * 1.
> -	 * If enabled subslice count is greater than four, two whole slices must
> -	 * be enabled instead.
> -	 *
> -	 * 2.
> -	 * When more than one slice is enabled, hardware ignores the subslice
> -	 * count altogether.
> -	 *
> -	 * From these restrictions it follows that it is not possible to enable
> -	 * a count of subslices between the SScount maximum of four restriction,
> -	 * and the maximum available number on a particular SKU. Either all
> -	 * subslices are enabled, or a count between one and four on the first
> -	 * slice.
> -	 */
> -	if (IS_GEN(i915, 11) &&
> -	    slices == 1 &&
> -	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
> -		GEM_BUG_ON(subslices & 1);
> -
> -		subslice_pg = false;
> -		slices *= 2;
> -	}
> -
> -	/*
> -	 * Starting in Gen9, render power gating can leave
> -	 * slice/subslice/EU in a partially enabled state. We
> -	 * must make an explicit request through RPCS for full
> -	 * enablement.
> -	*/
> -	if (sseu->has_slice_pg) {
> -		u32 mask, val = slices;
> -
> -		if (INTEL_GEN(i915) >= 11) {
> -			mask = GEN11_RPCS_S_CNT_MASK;
> -			val <<= GEN11_RPCS_S_CNT_SHIFT;
> -		} else {
> -			mask = GEN8_RPCS_S_CNT_MASK;
> -			val <<= GEN8_RPCS_S_CNT_SHIFT;
> -		}
> -
> -		GEM_BUG_ON(val & ~mask);
> -		val &= mask;
> -
> -		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
> -	}
> -
> -	if (subslice_pg) {
> -		u32 val = subslices;
> -
> -		val <<= GEN8_RPCS_SS_CNT_SHIFT;
> -
> -		GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
> -		val &= GEN8_RPCS_SS_CNT_MASK;
> -
> -		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
> -	}
> -
> -	if (sseu->has_eu_pg) {
> -		u32 val;
> -
> -		val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
> -		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
> -		val &= GEN8_RPCS_EU_MIN_MASK;
> -
> -		rpcs |= val;
> -
> -		val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
> -		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
> -		val &= GEN8_RPCS_EU_MAX_MASK;
> -
> -		rpcs |= val;
> -
> -		rpcs |= GEN8_RPCS_ENABLE;
> -	}
> -
> -	return rpcs;
> -}
> -
>   static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
>   {
>   	u32 indirect_ctx_offset;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 84aa230ea27b..99f75ee9d087 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -115,6 +115,4 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   							const char *prefix),
>   				   unsigned int max);
>   
> -u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
> -
>   #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/intel_sseu.c
> new file mode 100644
> index 000000000000..cfc80813f662
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_sseu.c
> @@ -0,0 +1,142 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "intel_lrc_reg.h"
> +#include "intel_sseu.h"
> +
> +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
> +			 const struct intel_sseu *req_sseu)
> +{
> +	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
> +	bool subslice_pg = sseu->has_subslice_pg;
> +	struct intel_sseu ctx_sseu;
> +	u8 slices, subslices;
> +	u32 rpcs = 0;
> +
> +	/*
> +	 * No explicit RPCS request is needed to ensure full
> +	 * slice/subslice/EU enablement prior to Gen9.
> +	 */
> +	if (INTEL_GEN(i915) < 9)
> +		return 0;
> +
> +	/*
> +	 * If i915/perf is active, we want a stable powergating configuration
> +	 * on the system.
> +	 *
> +	 * We could choose full enablement, but on ICL we know there are use
> +	 * cases which disable slices for functional, apart for performance
> +	 * reasons. So in this case we select a known stable subset.
> +	 */
> +	if (!i915->perf.oa.exclusive_stream) {
> +		ctx_sseu = *req_sseu;
> +	} else {
> +		ctx_sseu = intel_device_default_sseu(sseu);
> +
> +		if (IS_GEN(i915, 11)) {
> +			/*
> +			 * We only need subslice count so it doesn't matter
> +			 * which ones we select - just turn off low bits in the
> +			 * amount of half of all available subslices per slice.
> +			 */
> +			ctx_sseu.subslice_mask =
> +				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
> +			ctx_sseu.slice_mask = 0x1;
> +		}
> +	}
> +
> +	slices = hweight8(ctx_sseu.slice_mask);
> +	subslices = hweight8(ctx_sseu.subslice_mask);
> +
> +	/*
> +	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
> +	 * wide and Icelake has up to eight subslices, specfial programming is
> +	 * needed in order to correctly enable all subslices.
> +	 *
> +	 * According to documentation software must consider the configuration
> +	 * as 2x4x8 and hardware will translate this to 1x8x8.
> +	 *
> +	 * Furthemore, even though SScount is three bits, maximum documented
> +	 * value for it is four. From this some rules/restrictions follow:
> +	 *
> +	 * 1.
> +	 * If enabled subslice count is greater than four, two whole slices must
> +	 * be enabled instead.
> +	 *
> +	 * 2.
> +	 * When more than one slice is enabled, hardware ignores the subslice
> +	 * count altogether.
> +	 *
> +	 * From these restrictions it follows that it is not possible to enable
> +	 * a count of subslices between the SScount maximum of four restriction,
> +	 * and the maximum available number on a particular SKU. Either all
> +	 * subslices are enabled, or a count between one and four on the first
> +	 * slice.
> +	 */
> +	if (IS_GEN(i915, 11) &&
> +	    slices == 1 &&
> +	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
> +		GEM_BUG_ON(subslices & 1);
> +
> +		subslice_pg = false;
> +		slices *= 2;
> +	}
> +
> +	/*
> +	 * Starting in Gen9, render power gating can leave
> +	 * slice/subslice/EU in a partially enabled state. We
> +	 * must make an explicit request through RPCS for full
> +	 * enablement.
> +	 */
> +	if (sseu->has_slice_pg) {
> +		u32 mask, val = slices;
> +
> +		if (INTEL_GEN(i915) >= 11) {
> +			mask = GEN11_RPCS_S_CNT_MASK;
> +			val <<= GEN11_RPCS_S_CNT_SHIFT;
> +		} else {
> +			mask = GEN8_RPCS_S_CNT_MASK;
> +			val <<= GEN8_RPCS_S_CNT_SHIFT;
> +		}
> +
> +		GEM_BUG_ON(val & ~mask);
> +		val &= mask;
> +
> +		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
> +	}
> +
> +	if (subslice_pg) {
> +		u32 val = subslices;
> +
> +		val <<= GEN8_RPCS_SS_CNT_SHIFT;
> +
> +		GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
> +		val &= GEN8_RPCS_SS_CNT_MASK;
> +
> +		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
> +	}
> +
> +	if (sseu->has_eu_pg) {
> +		u32 val;
> +
> +		val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
> +		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
> +		val &= GEN8_RPCS_EU_MIN_MASK;
> +
> +		rpcs |= val;
> +
> +		val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
> +		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
> +		val &= GEN8_RPCS_EU_MAX_MASK;
> +
> +		rpcs |= val;
> +
> +		rpcs |= GEN8_RPCS_ENABLE;
> +	}
> +
> +	return rpcs;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/intel_sseu.h
> new file mode 100644
> index 000000000000..bf6fa019fd00
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_sseu.h
> @@ -0,0 +1,67 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_SSEU_H__
> +#define __INTEL_SSEU_H__
> +
> +#include <linux/types.h>
> +
> +struct drm_i915_private;
> +
> +#define GEN_MAX_SLICES		(6) /* CNL upper bound */
> +#define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
> +
> +struct sseu_dev_info {
> +	u8 slice_mask;
> +	u8 subslice_mask[GEN_MAX_SLICES];
> +	u16 eu_total;
> +	u8 eu_per_subslice;
> +	u8 min_eu_in_pool;
> +	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
> +	u8 subslice_7eu[3];
> +	u8 has_slice_pg:1;
> +	u8 has_subslice_pg:1;
> +	u8 has_eu_pg:1;
> +
> +	/* Topology fields */
> +	u8 max_slices;
> +	u8 max_subslices;
> +	u8 max_eus_per_subslice;
> +
> +	/* We don't have more than 8 eus per subslice at the moment and as we
> +	 * store eus enabled using bits, no need to multiply by eus per
> +	 * subslice.
> +	 */
> +	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
> +};
> +
> +/*
> + * Powergating configuration for a particular (context,engine).
> + */
> +struct intel_sseu {
> +	u8 slice_mask;
> +	u8 subslice_mask;
> +	u8 min_eus_per_subslice;
> +	u8 max_eus_per_subslice;
> +};
> +
> +static inline struct intel_sseu
> +intel_device_default_sseu(const struct sseu_dev_info *sseu)

As said before, if you rename this function to intel_device_sseu, 
intel_convert_device_sseu, or something, I would be fine with the patch. 
Basically I object to the default in the name since the sseu is passed 
in and could be any.

Regards,

Tvrtko

> +{
> +	struct intel_sseu value = {
> +		.slice_mask = sseu->slice_mask,
> +		.subslice_mask = sseu->subslice_mask[0],
> +		.min_eus_per_subslice = sseu->max_eus_per_subslice,
> +		.max_eus_per_subslice = sseu->max_eus_per_subslice,
> +	};
> +
> +	return value;
> +}
> +
> +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
> +			 const struct intel_sseu *req_sseu);
> +
> +#endif /* __INTEL_SSEU_H__ */
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 4e1b6efc6b22..e1cb22f03e8e 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -962,8 +962,7 @@ __sseu_finish(struct drm_i915_private *i915,
>   	      unsigned int expected,
>   	      struct igt_spinner *spin)
>   {
> -	unsigned int slices =
> -		hweight32(intel_device_default_sseu(i915).slice_mask);
> +	unsigned int slices = hweight32(engine->sseu.slice_mask);
>   	u32 rpcs = 0;
>   	int ret = 0;
>   
> @@ -1047,8 +1046,8 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
>   	       const char *name,
>   	       unsigned int flags)
>   {
> -	struct intel_sseu default_sseu = intel_device_default_sseu(i915);
>   	struct intel_engine_cs *engine = i915->engine[RCS0];
> +	struct intel_sseu default_sseu = engine->sseu;
>   	struct drm_i915_gem_object *obj;
>   	struct i915_gem_context *ctx;
>   	struct intel_sseu pg_sseu;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/
  2019-04-17  7:56 ` [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/ Chris Wilson
@ 2019-04-17  9:42   ` Tvrtko Ursulin
  2019-04-18 12:04   ` Joonas Lahtinen
  1 sibling, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17  9:42 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> Start partitioning off the code that talks to the hardware (GT) from the
> uapi layers and move the device facing code under gt/
> 
> One casualty is s/intel_ringbuffer.h/intel_engine.h/ with the plan to
> subdivide that header and body further (and split out the submission
> code from the ringbuffer and logical context handling). This patch aims
> to be simple motion so git can fixup inflight patches with little mess.

I would rather skip this for time being since I worry about the impact 
to other ongoing work. But if other guys want to ack it I won't object.

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile                 | 46 ++++++++++++-------
>   drivers/gpu/drm/i915/Makefile.header-test     |  6 +--
>   drivers/gpu/drm/i915/gt/Makefile              |  2 +
>   drivers/gpu/drm/i915/gt/Makefile.header-test  | 16 +++++++
>   .../gpu/drm/i915/{ => gt}/intel_breadcrumbs.c |  0
>   drivers/gpu/drm/i915/{ => gt}/intel_context.c |  3 +-
>   drivers/gpu/drm/i915/{ => gt}/intel_context.h |  0
>   .../drm/i915/{ => gt}/intel_context_types.h   |  0
>   .../{intel_ringbuffer.h => gt/intel_engine.h} |  0
>   .../gpu/drm/i915/{ => gt}/intel_engine_cs.c   |  8 ++--
>   .../drm/i915/{ => gt}/intel_engine_types.h    |  5 +-
>   .../drm/i915/{ => gt}/intel_gpu_commands.h    |  0
>   .../gpu/drm/i915/{ => gt}/intel_hangcheck.c   |  4 +-
>   drivers/gpu/drm/i915/{ => gt}/intel_lrc.c     |  5 +-
>   drivers/gpu/drm/i915/{ => gt}/intel_lrc.h     |  4 +-
>   drivers/gpu/drm/i915/{ => gt}/intel_lrc_reg.h |  0
>   drivers/gpu/drm/i915/{ => gt}/intel_mocs.c    |  4 +-
>   drivers/gpu/drm/i915/{ => gt}/intel_mocs.h    |  4 +-
>   .../i915/{i915_reset.c => gt/intel_reset.c}   |  2 +-
>   .../i915/{i915_reset.h => gt/intel_reset.h}   |  2 +-
>   .../gpu/drm/i915/{ => gt}/intel_ringbuffer.c  |  3 +-
>   drivers/gpu/drm/i915/{ => gt}/intel_sseu.c    |  0
>   drivers/gpu/drm/i915/{ => gt}/intel_sseu.h    |  0
>   .../gpu/drm/i915/{ => gt}/intel_workarounds.c |  2 +-
>   .../gpu/drm/i915/{ => gt}/intel_workarounds.h |  8 +++-
>   .../i915/{ => gt}/intel_workarounds_types.h   |  0
>   .../drm/i915/{selftests => gt}/mock_engine.c  | 10 ++--
>   .../drm/i915/{selftests => gt}/mock_engine.h  |  2 +-
>   .../selftest_engine_cs.c}                     |  0
>   .../selftest_hangcheck.c}                     | 16 +++----
>   .../intel_lrc.c => gt/selftest_lrc.c}         | 16 +++----
>   .../selftest_workarounds.c}                   | 18 ++++----
>   drivers/gpu/drm/i915/i915_cmd_parser.c        |  3 +-
>   drivers/gpu/drm/i915/i915_debugfs.c           |  3 +-
>   drivers/gpu/drm/i915/i915_drv.c               |  5 +-
>   drivers/gpu/drm/i915/i915_drv.h               |  7 +--
>   drivers/gpu/drm/i915/i915_gem.c               |  7 +--
>   drivers/gpu/drm/i915/i915_gem_context.c       |  7 ++-
>   drivers/gpu/drm/i915/i915_gem_context.h       |  3 +-
>   drivers/gpu/drm/i915/i915_gem_context_types.h |  3 +-
>   drivers/gpu/drm/i915/i915_gem_gtt.c           |  1 -
>   drivers/gpu/drm/i915/i915_gem_gtt.h           |  2 +-
>   drivers/gpu/drm/i915/i915_gpu_error.h         |  3 +-
>   drivers/gpu/drm/i915/i915_perf.c              |  3 +-
>   drivers/gpu/drm/i915/i915_pmu.c               |  4 +-
>   drivers/gpu/drm/i915/i915_request.c           |  1 -
>   drivers/gpu/drm/i915/i915_scheduler_types.h   |  2 +-
>   drivers/gpu/drm/i915/i915_trace.h             |  3 +-
>   drivers/gpu/drm/i915/i915_vma.c               |  3 +-
>   drivers/gpu/drm/i915/intel_device_info.h      |  6 ++-
>   drivers/gpu/drm/i915/intel_display.c          |  1 -
>   drivers/gpu/drm/i915/intel_guc_submission.c   |  3 +-
>   drivers/gpu/drm/i915/intel_guc_submission.h   |  3 +-
>   drivers/gpu/drm/i915/intel_uc.c               |  2 +-
>   .../gpu/drm/i915/selftests/i915_gem_context.c |  5 +-
>   drivers/gpu/drm/i915/selftests/igt_reset.c    |  3 +-
>   drivers/gpu/drm/i915/selftests/igt_spinner.h  |  3 +-
>   .../gpu/drm/i915/selftests/mock_gem_device.c  |  3 +-
>   drivers/gpu/drm/i915/selftests/mock_request.c |  3 +-
>   59 files changed, 166 insertions(+), 112 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/gt/Makefile
>   create mode 100644 drivers/gpu/drm/i915/gt/Makefile.header-test
>   rename drivers/gpu/drm/i915/{ => gt}/intel_breadcrumbs.c (100%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_context.c (99%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_context.h (100%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_context_types.h (100%)
>   rename drivers/gpu/drm/i915/{intel_ringbuffer.h => gt/intel_engine.h} (100%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_engine_cs.c (99%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_engine_types.h (99%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_gpu_commands.h (100%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_hangcheck.c (99%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_lrc.c (99%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_lrc.h (98%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_lrc_reg.h (100%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_mocs.c (99%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_mocs.h (97%)
>   rename drivers/gpu/drm/i915/{i915_reset.c => gt/intel_reset.c} (99%)
>   rename drivers/gpu/drm/i915/{i915_reset.h => gt/intel_reset.h} (98%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_ringbuffer.c (99%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_sseu.c (100%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_sseu.h (100%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds.c (99%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds.h (88%)
>   rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds_types.h (100%)
>   rename drivers/gpu/drm/i915/{selftests => gt}/mock_engine.c (97%)
>   rename drivers/gpu/drm/i915/{selftests => gt}/mock_engine.h (98%)
>   rename drivers/gpu/drm/i915/{selftests/intel_engine_cs.c => gt/selftest_engine_cs.c} (100%)
>   rename drivers/gpu/drm/i915/{selftests/intel_hangcheck.c => gt/selftest_hangcheck.c} (99%)
>   rename drivers/gpu/drm/i915/{selftests/intel_lrc.c => gt/selftest_lrc.c} (99%)
>   rename drivers/gpu/drm/i915/{selftests/intel_workarounds.c => gt/selftest_workarounds.c} (98%)
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 53ff209b91bb..40130cf5c003 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -35,32 +35,53 @@ subdir-ccflags-y += \
>   # Extra header tests
>   include $(src)/Makefile.header-test
>   
> +subdir-ccflags-y += -I$(src)
> +
>   # Please keep these build lists sorted!
>   
>   # core driver code
>   i915-y += i915_drv.o \
>   	  i915_irq.o \
> -	  i915_memcpy.o \
> -	  i915_mm.o \
>   	  i915_params.o \
>   	  i915_pci.o \
> -	  i915_reset.o \
>   	  i915_suspend.o \
> -	  i915_sw_fence.o \
> -	  i915_syncmap.o \
>   	  i915_sysfs.o \
> -	  i915_user_extensions.o \
>   	  intel_csr.o \
>   	  intel_device_info.o \
>   	  intel_pm.o \
>   	  intel_runtime_pm.o \
> -	  intel_workarounds.o
> +	  intel_uncore.o
> +
> +# core library code
> +i915-y += \
> +	i915_memcpy.o \
> +	i915_mm.o \
> +	i915_sw_fence.o \
> +	i915_syncmap.o \
> +	i915_user_extensions.o
>   
>   i915-$(CONFIG_COMPAT)   += i915_ioc32.o
>   i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
>   i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>   
> -# GEM code
> +# "Graphics Technology" (aka we talk to the gpu)
> +obj-y += gt/
> +gt-y += \
> +	gt/intel_breadcrumbs.o \
> +	gt/intel_context.o \
> +	gt/intel_engine_cs.o \
> +	gt/intel_hangcheck.o \
> +	gt/intel_lrc.o \
> +	gt/intel_reset.o \
> +	gt/intel_ringbuffer.o \
> +	gt/intel_mocs.o \
> +	gt/intel_sseu.o \
> +	gt/intel_workarounds.o
> +gt-$(CONFIG_DRM_I915_SELFTEST) += \
> +	gt/mock_engine.o
> +i915-y += $(gt-y)
> +
> +# GEM (Graphics Execution Management) code
>   i915-y += \
>   	  i915_active.o \
>   	  i915_cmd_parser.o \
> @@ -88,15 +109,6 @@ i915-y += \
>   	  i915_timeline.o \
>   	  i915_trace_points.o \
>   	  i915_vma.o \
> -	  intel_breadcrumbs.o \
> -	  intel_context.o \
> -	  intel_engine_cs.o \
> -	  intel_hangcheck.o \
> -	  intel_lrc.o \
> -	  intel_mocs.o \
> -	  intel_ringbuffer.o \
> -	  intel_sseu.o \
> -	  intel_uncore.o \
>   	  intel_wopcm.o
>   
>   # general-purpose microcontroller (GuC) support
> diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
> index 5bcc78d7ac96..96a5d90629ec 100644
> --- a/drivers/gpu/drm/i915/Makefile.header-test
> +++ b/drivers/gpu/drm/i915/Makefile.header-test
> @@ -13,13 +13,11 @@ header_test := \
>   	intel_cdclk.h \
>   	intel_color.h \
>   	intel_connector.h \
> -	intel_context_types.h \
>   	intel_crt.h \
>   	intel_csr.h \
>   	intel_ddi.h \
>   	intel_dp.h \
>   	intel_dvo.h \
> -	intel_engine_types.h \
>   	intel_fbc.h \
>   	intel_fbdev.h \
>   	intel_frontbuffer.h \
> @@ -33,9 +31,7 @@ header_test := \
>   	intel_psr.h \
>   	intel_sdvo.h \
>   	intel_sprite.h \
> -	intel_sseu.h \
> -	intel_tv.h \
> -	intel_workarounds_types.h
> +	intel_tv.h
>   
>   quiet_cmd_header_test = HDRTEST $@
>         cmd_header_test = echo "\#include \"$(<F)\"" > $@
> diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile
> new file mode 100644
> index 000000000000..1c75b5c9790c
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/Makefile
> @@ -0,0 +1,2 @@
> +# Extra header tests
> +include $(src)/Makefile.header-test
> diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test
> new file mode 100644
> index 000000000000..61e06cbb4b32
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/Makefile.header-test
> @@ -0,0 +1,16 @@
> +# SPDX-License-Identifier: MIT
> +# Copyright © 2019 Intel Corporation
> +
> +# Test the headers are compilable as standalone units
> +header_test := $(notdir $(wildcard $(src)/*.h))
> +
> +quiet_cmd_header_test = HDRTEST $@
> +      cmd_header_test = echo "\#include \"$(<F)\"" > $@
> +
> +header_test_%.c: %.h
> +	$(call cmd,header_test)
> +
> +extra-$(CONFIG_DRM_I915_WERROR) += \
> +	$(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
> +
> +clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_breadcrumbs.c
> rename to drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_context.c
> rename to drivers/gpu/drm/i915/gt/intel_context.c
> index 961d1445833d..ebd1e5919a4a 100644
> --- a/drivers/gpu/drm/i915/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -7,8 +7,9 @@
>   #include "i915_drv.h"
>   #include "i915_gem_context.h"
>   #include "i915_globals.h"
> +
>   #include "intel_context.h"
> -#include "intel_ringbuffer.h"
> +#include "intel_engine.h"
>   
>   static struct i915_global_context {
>   	struct i915_global base;
> diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_context.h
> rename to drivers/gpu/drm/i915/gt/intel_context.h
> diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_context_types.h
> rename to drivers/gpu/drm/i915/gt/intel_context_types.h
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_ringbuffer.h
> rename to drivers/gpu/drm/i915/gt/intel_engine.h
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_engine_cs.c
> rename to drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index ad2a683d97f7..21dd3f25e641 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -25,9 +25,10 @@
>   #include <drm/drm_print.h>
>   
>   #include "i915_drv.h"
> -#include "i915_reset.h"
> -#include "intel_ringbuffer.h"
> +
> +#include "intel_engine.h"
>   #include "intel_lrc.h"
> +#include "intel_reset.h"
>   
>   /* Haswell does have the CXT_SIZE register however it does not appear to be
>    * valid. Now, docs explain in dwords what is in the context object. The full
> @@ -1756,6 +1757,5 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
>   }
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> -#include "selftests/mock_engine.c"
> -#include "selftests/intel_engine_cs.c"
> +#include "selftest_engine_cs.c"
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_engine_types.h
> rename to drivers/gpu/drm/i915/gt/intel_engine_types.h
> index d07a01b3ed0b..3adf58da6d2c 100644
> --- a/drivers/gpu/drm/i915/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -14,15 +14,14 @@
>   #include <linux/types.h>
>   
>   #include "i915_gem.h"
> +#include "i915_gem_batch_pool.h"
> +#include "i915_pmu.h"
>   #include "i915_priolist_types.h"
>   #include "i915_selftest.h"
>   #include "i915_timeline_types.h"
>   #include "intel_sseu.h"
>   #include "intel_workarounds_types.h"
>   
> -#include "i915_gem_batch_pool.h"
> -#include "i915_pmu.h"
> -
>   #define I915_MAX_SLICES	3
>   #define I915_MAX_SUBSLICES 8
>   
> diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_gpu_commands.h
> rename to drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_hangcheck.c
> rename to drivers/gpu/drm/i915/gt/intel_hangcheck.c
> index 3d51ed1428d4..3053a706a561 100644
> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> @@ -22,8 +22,8 @@
>    *
>    */
>   
> +#include "intel_reset.h"
>   #include "i915_drv.h"
> -#include "i915_reset.h"
>   
>   struct hangcheck {
>   	u64 acthd;
> @@ -330,5 +330,5 @@ void intel_hangcheck_init(struct drm_i915_private *i915)
>   }
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> -#include "selftests/intel_hangcheck.c"
> +#include "selftest_hangcheck.c"
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_lrc.c
> rename to drivers/gpu/drm/i915/gt/intel_lrc.c
> index 18a9dc6ca877..5cadf8f6a23d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -133,13 +133,12 @@
>    */
>   #include <linux/interrupt.h>
>   
> -#include <drm/i915_drm.h>
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
> -#include "i915_reset.h"
>   #include "i915_vgpu.h"
>   #include "intel_lrc_reg.h"
>   #include "intel_mocs.h"
> +#include "intel_reset.h"
>   #include "intel_workarounds.h"
>   
>   #define RING_EXECLIST_QFULL		(1 << 0x2)
> @@ -2905,5 +2904,5 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
>   }
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> -#include "selftests/intel_lrc.c"
> +#include "selftest_lrc.c"
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> similarity index 98%
> rename from drivers/gpu/drm/i915/intel_lrc.h
> rename to drivers/gpu/drm/i915/gt/intel_lrc.h
> index 99f75ee9d087..1a33ec74af8c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -24,8 +24,7 @@
>   #ifndef _INTEL_LRC_H_
>   #define _INTEL_LRC_H_
>   
> -#include "intel_ringbuffer.h"
> -#include "i915_gem_context.h"
> +#include "intel_engine.h"
>   
>   /* Execlists regs */
>   #define RING_ELSP(base)				_MMIO((base) + 0x230)
> @@ -99,7 +98,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
>   struct drm_printer;
>   
>   struct drm_i915_private;
> -struct i915_gem_context;
>   
>   void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
>   
> diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_lrc_reg.h
> rename to drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_mocs.c
> rename to drivers/gpu/drm/i915/gt/intel_mocs.c
> index 274ba78500c0..79df66022d3a 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -20,9 +20,11 @@
>    * SOFTWARE.
>    */
>   
> +#include "i915_drv.h"
> +
> +#include "intel_engine.h"
>   #include "intel_mocs.h"
>   #include "intel_lrc.h"
> -#include "intel_ringbuffer.h"
>   
>   /* structures required */
>   struct drm_i915_mocs_entry {
> diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
> similarity index 97%
> rename from drivers/gpu/drm/i915/intel_mocs.h
> rename to drivers/gpu/drm/i915/gt/intel_mocs.h
> index 3d99d1271b2b..0913704a1af2 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
> @@ -49,7 +49,9 @@
>    * context handling keep the MOCS in step.
>    */
>   
> -#include "i915_drv.h"
> +struct drm_i915_private;
> +struct i915_request;
> +struct intel_engine_cs;
>   
>   int intel_rcs_context_init_mocs(struct i915_request *rq);
>   void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/i915_reset.c
> rename to drivers/gpu/drm/i915/gt/intel_reset.c
> index 677d59304e78..9731a2295639 100644
> --- a/drivers/gpu/drm/i915/i915_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -9,7 +9,7 @@
>   
>   #include "i915_drv.h"
>   #include "i915_gpu_error.h"
> -#include "i915_reset.h"
> +#include "intel_reset.h"
>   
>   #include "intel_guc.h"
>   
> diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
> similarity index 98%
> rename from drivers/gpu/drm/i915/i915_reset.h
> rename to drivers/gpu/drm/i915/gt/intel_reset.h
> index 3c0450289b8f..8e662bb43a9b 100644
> --- a/drivers/gpu/drm/i915/i915_reset.h
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.h
> @@ -11,7 +11,7 @@
>   #include <linux/types.h>
>   #include <linux/srcu.h>
>   
> -#include "intel_engine_types.h"
> +#include "gt/intel_engine_types.h"
>   
>   struct drm_i915_private;
>   struct i915_request;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_ringbuffer.c
> rename to drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 029fd8ec1857..c1214fd25702 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -33,9 +33,8 @@
>   
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
> -#include "i915_reset.h"
>   #include "i915_trace.h"
> -#include "intel_drv.h"
> +#include "intel_reset.h"
>   #include "intel_workarounds.h"
>   
>   /* Rough estimate of the typical request size, performing a flush,
> diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_sseu.c
> rename to drivers/gpu/drm/i915/gt/intel_sseu.c
> diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_sseu.h
> rename to drivers/gpu/drm/i915/gt/intel_sseu.h
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_workarounds.c
> rename to drivers/gpu/drm/i915/gt/intel_workarounds.c
> index b3cbed1ee1c9..f46ed0e2f07c 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -1398,5 +1398,5 @@ int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
>   }
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> -#include "selftests/intel_workarounds.c"
> +#include "selftest_workarounds.c"
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h
> similarity index 88%
> rename from drivers/gpu/drm/i915/intel_workarounds.h
> rename to drivers/gpu/drm/i915/gt/intel_workarounds.h
> index fdf7ebb90f28..3761a6ee58bb 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.h
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h
> @@ -4,13 +4,17 @@
>    * Copyright © 2014-2018 Intel Corporation
>    */
>   
> -#ifndef _I915_WORKAROUNDS_H_
> -#define _I915_WORKAROUNDS_H_
> +#ifndef _INTEL_WORKAROUNDS_H_
> +#define _INTEL_WORKAROUNDS_H_
>   
>   #include <linux/slab.h>
>   
>   #include "intel_workarounds_types.h"
>   
> +struct drm_i915_private;
> +struct i915_request;
> +struct intel_engine_cs;
> +
>   static inline void intel_wa_list_free(struct i915_wa_list *wal)
>   {
>   	kfree(wal->list);
> diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_workarounds_types.h
> rename to drivers/gpu/drm/i915/gt/intel_workarounds_types.h
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> similarity index 97%
> rename from drivers/gpu/drm/i915/selftests/mock_engine.c
> rename to drivers/gpu/drm/i915/gt/mock_engine.c
> index 61a8206ed677..414afd2f27fe 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -22,8 +22,11 @@
>    *
>    */
>   
> +#include "i915_drv.h"
> +#include "intel_context.h"
> +
>   #include "mock_engine.h"
> -#include "mock_request.h"
> +#include "selftests/mock_request.h"
>   
>   struct mock_ring {
>   	struct intel_ring base;
> @@ -268,8 +271,9 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
>   	timer_setup(&engine->hw_delay, hw_delay_complete, 0);
>   	INIT_LIST_HEAD(&engine->hw_queue);
>   
> -	if (pin_context(i915->kernel_context, &engine->base,
> -			&engine->base.kernel_context))
> +	engine->base.kernel_context =
> +		intel_context_pin(i915->kernel_context, &engine->base);
> +	if (IS_ERR(engine->base.kernel_context))
>   		goto err_breadcrumbs;
>   
>   	return &engine->base;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h
> similarity index 98%
> rename from drivers/gpu/drm/i915/selftests/mock_engine.h
> rename to drivers/gpu/drm/i915/gt/mock_engine.h
> index b9cc3a245f16..44b35a85e9d1 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.h
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.h
> @@ -29,7 +29,7 @@
>   #include <linux/spinlock.h>
>   #include <linux/timer.h>
>   
> -#include "../intel_ringbuffer.h"
> +#include "gt/intel_engine.h"
>   
>   struct mock_engine {
>   	struct intel_engine_cs base;
> diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> similarity index 100%
> rename from drivers/gpu/drm/i915/selftests/intel_engine_cs.c
> rename to drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> rename to drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 050bd1e19e02..87c26920212f 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -24,14 +24,14 @@
>   
>   #include <linux/kthread.h>
>   
> -#include "../i915_selftest.h"
> -#include "i915_random.h"
> -#include "igt_flush_test.h"
> -#include "igt_reset.h"
> -#include "igt_wedge_me.h"
> -
> -#include "mock_context.h"
> -#include "mock_drm.h"
> +#include "i915_selftest.h"
> +#include "selftests/i915_random.h"
> +#include "selftests/igt_flush_test.h"
> +#include "selftests/igt_reset.h"
> +#include "selftests/igt_wedge_me.h"
> +
> +#include "selftests/mock_context.h"
> +#include "selftests/mock_drm.h"
>   
>   #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
>   
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/selftests/intel_lrc.c
> rename to drivers/gpu/drm/i915/gt/selftest_lrc.c
> index fbee030db940..cd0551f97c2f 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -6,15 +6,13 @@
>   
>   #include <linux/prime_numbers.h>
>   
> -#include "../i915_reset.h"
> -
> -#include "../i915_selftest.h"
> -#include "igt_flush_test.h"
> -#include "igt_live_test.h"
> -#include "igt_spinner.h"
> -#include "i915_random.h"
> -
> -#include "mock_context.h"
> +#include "gt/intel_reset.h"
> +#include "i915_selftest.h"
> +#include "selftests/i915_random.h"
> +#include "selftests/igt_flush_test.h"
> +#include "selftests/igt_live_test.h"
> +#include "selftests/igt_spinner.h"
> +#include "selftests/mock_context.h"
>   
>   static int live_sanitycheck(void *arg)
>   {
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> similarity index 98%
> rename from drivers/gpu/drm/i915/selftests/intel_workarounds.c
> rename to drivers/gpu/drm/i915/gt/selftest_workarounds.c
> index 6f941c31dcab..96c6282f3a10 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> @@ -4,15 +4,15 @@
>    * Copyright © 2018 Intel Corporation
>    */
>   
> -#include "../i915_selftest.h"
> -#include "../i915_reset.h"
> -
> -#include "igt_flush_test.h"
> -#include "igt_reset.h"
> -#include "igt_spinner.h"
> -#include "igt_wedge_me.h"
> -#include "mock_context.h"
> -#include "mock_drm.h"
> +#include "i915_selftest.h"
> +#include "intel_reset.h"
> +
> +#include "selftests/igt_flush_test.h"
> +#include "selftests/igt_reset.h"
> +#include "selftests/igt_spinner.h"
> +#include "selftests/igt_wedge_me.h"
> +#include "selftests/mock_context.h"
> +#include "selftests/mock_drm.h"
>   
>   static const struct wo_register {
>   	enum intel_platform platform;
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 503d548a55f7..e9fadcb4d592 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -25,8 +25,9 @@
>    *
>    */
>   
> +#include "gt/intel_engine.h"
> +
>   #include "i915_drv.h"
> -#include "intel_ringbuffer.h"
>   
>   /**
>    * DOC: batch buffer command parser
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 5823ffb17821..3f039758b152 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -32,7 +32,8 @@
>   #include <drm/drm_debugfs.h>
>   #include <drm/drm_fourcc.h>
>   
> -#include "i915_reset.h"
> +#include "gt/intel_reset.h"
> +
>   #include "intel_dp.h"
>   #include "intel_drv.h"
>   #include "intel_fbc.h"
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 1ad88e6d7c04..98b997526daa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -47,10 +47,12 @@
>   #include <drm/drm_probe_helper.h>
>   #include <drm/i915_drm.h>
>   
> +#include "gt/intel_workarounds.h"
> +#include "gt/intel_reset.h"
> +
>   #include "i915_drv.h"
>   #include "i915_pmu.h"
>   #include "i915_query.h"
> -#include "i915_reset.h"
>   #include "i915_trace.h"
>   #include "i915_vgpu.h"
>   #include "intel_audio.h"
> @@ -62,7 +64,6 @@
>   #include "intel_pm.h"
>   #include "intel_sprite.h"
>   #include "intel_uc.h"
> -#include "intel_workarounds.h"
>   
>   static struct drm_driver driver;
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7b5da9eddc1c..fad5306f07da 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -62,18 +62,19 @@
>   #include "i915_reg.h"
>   #include "i915_utils.h"
>   
> +#include "gt/intel_lrc.h"
> +#include "gt/intel_engine.h"
> +#include "gt/intel_workarounds.h"
> +
>   #include "intel_bios.h"
>   #include "intel_device_info.h"
>   #include "intel_display.h"
>   #include "intel_dpll_mgr.h"
>   #include "intel_frontbuffer.h"
> -#include "intel_lrc.h"
>   #include "intel_opregion.h"
> -#include "intel_ringbuffer.h"
>   #include "intel_uc.h"
>   #include "intel_uncore.h"
>   #include "intel_wopcm.h"
> -#include "intel_workarounds.h"
>   
>   #include "i915_gem.h"
>   #include "i915_gem_context.h"
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a5412323fee1..9554960977a3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -39,19 +39,20 @@
>   #include <linux/dma-buf.h>
>   #include <linux/mman.h>
>   
> +#include "gt/intel_mocs.h"
> +#include "gt/intel_reset.h"
> +#include "gt/intel_workarounds.h"
> +
>   #include "i915_drv.h"
>   #include "i915_gem_clflush.h"
>   #include "i915_gemfs.h"
>   #include "i915_globals.h"
> -#include "i915_reset.h"
>   #include "i915_trace.h"
>   #include "i915_vgpu.h"
>   
>   #include "intel_drv.h"
>   #include "intel_frontbuffer.h"
> -#include "intel_mocs.h"
>   #include "intel_pm.h"
> -#include "intel_workarounds.h"
>   
>   static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index c02a30612df9..37dff694456c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -86,13 +86,16 @@
>    */
>   
>   #include <linux/log2.h>
> +
>   #include <drm/i915_drm.h>
> +
> +#include "gt/intel_lrc_reg.h"
> +#include "gt/intel_workarounds.h"
> +
>   #include "i915_drv.h"
>   #include "i915_globals.h"
>   #include "i915_trace.h"
>   #include "i915_user_extensions.h"
> -#include "intel_lrc_reg.h"
> -#include "intel_workarounds.h"
>   
>   #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
>   #define I915_CONTEXT_PARAM_VM 0x9
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 23dcb01bfd82..cec278ab04e2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -27,9 +27,10 @@
>   
>   #include "i915_gem_context_types.h"
>   
> +#include "gt/intel_context.h"
> +
>   #include "i915_gem.h"
>   #include "i915_scheduler.h"
> -#include "intel_context.h"
>   #include "intel_device_info.h"
>   
>   struct drm_device;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> index e2ec58b10fb2..d282a6ab3b9f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> @@ -17,8 +17,9 @@
>   #include <linux/rcupdate.h>
>   #include <linux/types.h>
>   
> +#include "gt/intel_context_types.h"
> +
>   #include "i915_scheduler.h"
> -#include "intel_context_types.h"
>   
>   struct pid;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8f460cc4cc1f..aab778728ea2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -37,7 +37,6 @@
>   
>   #include "i915_drv.h"
>   #include "i915_vgpu.h"
> -#include "i915_reset.h"
>   #include "i915_trace.h"
>   #include "intel_drv.h"
>   #include "intel_frontbuffer.h"
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index f597f35b109b..c8d96e91f3dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -38,8 +38,8 @@
>   #include <linux/mm.h>
>   #include <linux/pagevec.h>
>   
> +#include "gt/intel_reset.h"
>   #include "i915_request.h"
> -#include "i915_reset.h"
>   #include "i915_selftest.h"
>   #include "i915_timeline.h"
>   
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 5dc761e85d9d..b419d0f59275 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -13,8 +13,9 @@
>   
>   #include <drm/drm_mm.h>
>   
> +#include "gt/intel_engine.h"
> +
>   #include "intel_device_info.h"
> -#include "intel_ringbuffer.h"
>   #include "intel_uc_fw.h"
>   
>   #include "i915_gem.h"
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 56da457bed21..a87f790335c1 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -195,6 +195,8 @@
>   #include <linux/sizes.h>
>   #include <linux/uuid.h>
>   
> +#include "gt/intel_lrc_reg.h"
> +
>   #include "i915_drv.h"
>   #include "i915_oa_hsw.h"
>   #include "i915_oa_bdw.h"
> @@ -210,7 +212,6 @@
>   #include "i915_oa_cflgt3.h"
>   #include "i915_oa_cnl.h"
>   #include "i915_oa_icl.h"
> -#include "intel_lrc_reg.h"
>   
>   /* HW requires this to be a power of two, between 128k and 16M, though driver
>    * is currently generally designed assuming the largest 16M size is used such
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 46a52da3db29..35e502481f29 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -6,8 +6,10 @@
>   
>   #include <linux/irq.h>
>   #include <linux/pm_runtime.h>
> +
> +#include "gt/intel_engine.h"
> +
>   #include "i915_pmu.h"
> -#include "intel_ringbuffer.h"
>   #include "i915_drv.h"
>   
>   /* Frequency for the sampling timer for events which need it. */
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index e0efc334463b..74ae698c1f95 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -32,7 +32,6 @@
>   #include "i915_active.h"
>   #include "i915_drv.h"
>   #include "i915_globals.h"
> -#include "i915_reset.h"
>   #include "intel_pm.h"
>   
>   struct execute_cb {
> diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> index f1af3916a808..166a457884b2 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> @@ -9,8 +9,8 @@
>   
>   #include <linux/list.h>
>   
> +#include "gt/intel_engine_types.h"
>   #include "i915_priolist_types.h"
> -#include "intel_engine_types.h"
>   
>   struct drm_i915_private;
>   struct i915_request;
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 12893304c8f8..b5286f3d8146 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -8,9 +8,10 @@
>   
>   #include <drm/drm_drv.h>
>   
> +#include "gt/intel_engine.h"
> +
>   #include "i915_drv.h"
>   #include "intel_drv.h"
> -#include "intel_ringbuffer.h"
>   
>   #undef TRACE_SYSTEM
>   #define TRACE_SYSTEM i915
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 36726392e737..d4d308b6d1d8 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -22,11 +22,12 @@
>    *
>    */
>   
> +#include "gt/intel_engine.h"
> +
>   #include "i915_vma.h"
>   
>   #include "i915_drv.h"
>   #include "i915_globals.h"
> -#include "intel_ringbuffer.h"
>   #include "intel_frontbuffer.h"
>   
>   #include <drm/drm_gem.h>
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 3045e0dee2a1..aa89a9adeffb 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -27,9 +27,11 @@
>   
>   #include <uapi/drm/i915_drm.h>
>   
> -#include "intel_engine_types.h"
> +#include "gt/intel_engine_types.h"
> +#include "gt/intel_context_types.h"
> +#include "gt/intel_sseu.h"
> +
>   #include "intel_display.h"
> -#include "intel_sseu.h"
>   
>   struct drm_printer;
>   struct drm_i915_private;
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 3bd40a4a6739..24e70d46b872 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -46,7 +46,6 @@
>   
>   #include "i915_drv.h"
>   #include "i915_gem_clflush.h"
> -#include "i915_reset.h"
>   #include "i915_trace.h"
>   #include "intel_atomic_plane.h"
>   #include "intel_color.h"
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 37f60cb8e9e1..1b6d6403ee92 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -25,8 +25,9 @@
>   #include <linux/circ_buf.h>
>   #include <trace/events/dma_fence.h>
>   
> +#include "gt/intel_lrc_reg.h"
> +
>   #include "intel_guc_submission.h"
> -#include "intel_lrc_reg.h"
>   #include "i915_drv.h"
>   
>   #define GUC_PREEMPT_FINISHED		0x1
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h
> index aa5e6749c925..7d823a513b9c 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.h
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.h
> @@ -27,9 +27,10 @@
>   
>   #include <linux/spinlock.h>
>   
> +#include "gt/intel_engine_types.h"
> +
>   #include "i915_gem.h"
>   #include "i915_selftest.h"
> -#include "intel_engine_types.h"
>   
>   struct drm_i915_private;
>   
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 25b80ffe71ad..13f823ff8083 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -22,11 +22,11 @@
>    *
>    */
>   
> +#include "gt/intel_reset.h"
>   #include "intel_uc.h"
>   #include "intel_guc_submission.h"
>   #include "intel_guc.h"
>   #include "i915_drv.h"
> -#include "i915_reset.h"
>   
>   static void guc_free_load_err_log(struct intel_guc *guc);
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index e1cb22f03e8e..6f52ca881173 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -24,8 +24,9 @@
>   
>   #include <linux/prime_numbers.h>
>   
> -#include "../i915_reset.h"
> -#include "../i915_selftest.h"
> +#include "gt/intel_reset.h"
> +#include "i915_selftest.h"
> +
>   #include "i915_random.h"
>   #include "igt_flush_test.h"
>   #include "igt_live_test.h"
> diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c
> index 208a966da8ca..4f31b137c428 100644
> --- a/drivers/gpu/drm/i915/selftests/igt_reset.c
> +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c
> @@ -6,8 +6,9 @@
>   
>   #include "igt_reset.h"
>   
> +#include "gt/intel_engine.h"
> +
>   #include "../i915_drv.h"
> -#include "../intel_ringbuffer.h"
>   
>   void igt_global_reset_lock(struct drm_i915_private *i915)
>   {
> diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h
> index 391777c76dc7..d312e7cdab68 100644
> --- a/drivers/gpu/drm/i915/selftests/igt_spinner.h
> +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h
> @@ -9,9 +9,10 @@
>   
>   #include "../i915_selftest.h"
>   
> +#include "gt/intel_engine.h"
> +
>   #include "../i915_drv.h"
>   #include "../i915_request.h"
> -#include "../intel_ringbuffer.h"
>   #include "../i915_gem_context.h"
>   
>   struct igt_spinner {
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 60bbf8b4df40..f444ee5add27 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -25,7 +25,8 @@
>   #include <linux/pm_domain.h>
>   #include <linux/pm_runtime.h>
>   
> -#include "mock_engine.h"
> +#include "gt/mock_engine.h"
> +
>   #include "mock_context.h"
>   #include "mock_request.h"
>   #include "mock_gem_device.h"
> diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
> index d1a7c9608712..f739ba63057f 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_request.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_request.c
> @@ -22,7 +22,8 @@
>    *
>    */
>   
> -#include "mock_engine.h"
> +#include "gt/mock_engine.h"
> +
>   #include "mock_request.h"
>   
>   struct i915_request *
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 08/32] drm/i915: Introduce struct intel_wakeref
  2019-04-17  7:56 ` [PATCH 08/32] drm/i915: Introduce struct intel_wakeref Chris Wilson
@ 2019-04-17  9:45   ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17  9:45 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> For controlling runtime pm of the GT and engines, we would like to have
> a callback to do extra work the first time we wake up and the last time
> we drop the wakeref. This first/last access needs serialisation and so
> we encompass a mutex with the regular intel_wakeref_t tracker.
> 
> v2: Drop the _once naming and report the errors.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc; Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile             |   1 +
>   drivers/gpu/drm/i915/Makefile.header-test |   3 +-
>   drivers/gpu/drm/i915/i915_drv.h           |   3 +-
>   drivers/gpu/drm/i915/intel_wakeref.c      |  61 ++++++++++
>   drivers/gpu/drm/i915/intel_wakeref.h      | 133 ++++++++++++++++++++++
>   5 files changed, 198 insertions(+), 3 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/intel_wakeref.c
>   create mode 100644 drivers/gpu/drm/i915/intel_wakeref.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 40130cf5c003..233bad5e361f 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -50,6 +50,7 @@ i915-y += i915_drv.o \
>   	  intel_device_info.o \
>   	  intel_pm.o \
>   	  intel_runtime_pm.o \
> +	  intel_wakeref.o \
>   	  intel_uncore.o
>   
>   # core library code
> diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
> index 96a5d90629ec..e6b3e7588860 100644
> --- a/drivers/gpu/drm/i915/Makefile.header-test
> +++ b/drivers/gpu/drm/i915/Makefile.header-test
> @@ -31,7 +31,8 @@ header_test := \
>   	intel_psr.h \
>   	intel_sdvo.h \
>   	intel_sprite.h \
> -	intel_tv.h
> +	intel_tv.h \
> +	intel_wakeref.h
>   
>   quiet_cmd_header_test = HDRTEST $@
>         cmd_header_test = echo "\#include \"$(<F)\"" > $@
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fad5306f07da..62a7e91acd7f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -74,6 +74,7 @@
>   #include "intel_opregion.h"
>   #include "intel_uc.h"
>   #include "intel_uncore.h"
> +#include "intel_wakeref.h"
>   #include "intel_wopcm.h"
>   
>   #include "i915_gem.h"
> @@ -134,8 +135,6 @@ bool i915_error_injected(void);
>   	__i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
>   		      fmt, ##__VA_ARGS__)
>   
> -typedef depot_stack_handle_t intel_wakeref_t;
> -
>   enum hpd_pin {
>   	HPD_NONE = 0,
>   	HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
> new file mode 100644
> index 000000000000..1f94bc4ff9e4
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_wakeref.c
> @@ -0,0 +1,61 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "intel_drv.h"
> +#include "intel_wakeref.h"
> +
> +int __intel_wakeref_get_first(struct drm_i915_private *i915,
> +			      struct intel_wakeref *wf,
> +			      int (*fn)(struct intel_wakeref *wf))
> +{
> +	/*
> +	 * Treat get/put as different subclasses, as we may need to run
> +	 * the put callback from under the shrinker and do not want to
> +	 * cross-contanimate that callback with any extra work performed
> +	 * upon acquiring the wakeref.
> +	 */
> +	mutex_lock_nested(&wf->mutex, SINGLE_DEPTH_NESTING);
> +	if (!atomic_read(&wf->count)) {
> +		int err;
> +
> +		wf->wakeref = intel_runtime_pm_get(i915);
> +
> +		err = fn(wf);
> +		if (unlikely(err)) {
> +			intel_runtime_pm_put(i915, wf->wakeref);
> +			mutex_unlock(&wf->mutex);
> +			return err;
> +		}
> +
> +		smp_mb__before_atomic(); /* release wf->count */
> +	}
> +	atomic_inc(&wf->count);
> +	mutex_unlock(&wf->mutex);
> +
> +	return 0;
> +}
> +
> +int __intel_wakeref_put_last(struct drm_i915_private *i915,
> +			     struct intel_wakeref *wf,
> +			     int (*fn)(struct intel_wakeref *wf))
> +{
> +	int err;
> +
> +	err = fn(wf);
> +	if (likely(!err))
> +		intel_runtime_pm_put(i915, wf->wakeref);
> +	else
> +		atomic_inc(&wf->count);
> +	mutex_unlock(&wf->mutex);
> +
> +	return err;
> +}
> +
> +void __intel_wakeref_init(struct intel_wakeref *wf, struct lock_class_key *key)
> +{
> +	__mutex_init(&wf->mutex, "wakeref", key);
> +	atomic_set(&wf->count, 0);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
> new file mode 100644
> index 000000000000..a979d638344b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_wakeref.h
> @@ -0,0 +1,133 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_WAKEREF_H
> +#define INTEL_WAKEREF_H
> +
> +#include <linux/atomic.h>
> +#include <linux/mutex.h>
> +#include <linux/stackdepot.h>
> +
> +struct drm_i915_private;
> +
> +typedef depot_stack_handle_t intel_wakeref_t;
> +
> +struct intel_wakeref {
> +	atomic_t count;
> +	struct mutex mutex;
> +	intel_wakeref_t wakeref;
> +};
> +
> +void __intel_wakeref_init(struct intel_wakeref *wf,
> +			  struct lock_class_key *key);
> +#define intel_wakeref_init(wf) do {					\
> +	static struct lock_class_key __key;				\
> +									\
> +	__intel_wakeref_init((wf), &__key);				\
> +} while (0)
> +
> +int __intel_wakeref_get_first(struct drm_i915_private *i915,
> +			      struct intel_wakeref *wf,
> +			      int (*fn)(struct intel_wakeref *wf));
> +int __intel_wakeref_put_last(struct drm_i915_private *i915,
> +			     struct intel_wakeref *wf,
> +			     int (*fn)(struct intel_wakeref *wf));
> +
> +/**
> + * intel_wakeref_get: Acquire the wakeref
> + * @i915: the drm_i915_private device
> + * @wf: the wakeref
> + * @fn: callback for acquired the wakeref, called only on first acquire.
> + *
> + * Acquire a hold on the wakeref. The first user to do so, will acquire
> + * the runtime pm wakeref and then call the @fn underneath the wakeref
> + * mutex.
> + *
> + * Note that @fn is allowed to fail, in which case the runtime-pm wakeref
> + * will be released and the acquisition unwound, and an error reported.
> + *
> + * Returns: 0 if the wakeref was acquired successfully, or a negative error
> + * code otherwise.
> + */
> +static inline int
> +intel_wakeref_get(struct drm_i915_private *i915,
> +		  struct intel_wakeref *wf,
> +		  int (*fn)(struct intel_wakeref *wf))
> +{
> +	if (unlikely(!atomic_inc_not_zero(&wf->count)))
> +		return __intel_wakeref_get_first(i915, wf, fn);
> +
> +	return 0;
> +}
> +
> +/**
> + * intel_wakeref_put: Release the wakeref
> + * @i915: the drm_i915_private device
> + * @wf: the wakeref
> + * @fn: callback for releasing the wakeref, called only on final release.
> + *
> + * Release our hold on the wakeref. When there are no more users,
> + * the runtime pm wakeref will be released after the @fn callback is called
> + * underneath the wakeref mutex.
> + *
> + * Note that @fn is allowed to fail, in which case the runtime-pm wakeref
> + * is retained and an error reported.
> + *
> + * Returns: 0 if the wakeref was released successfully, or a negative error
> + * code otherwise.
> + */
> +static inline int
> +intel_wakeref_put(struct drm_i915_private *i915,
> +		  struct intel_wakeref *wf,
> +		  int (*fn)(struct intel_wakeref *wf))
> +{
> +	if (atomic_dec_and_mutex_lock(&wf->count, &wf->mutex))
> +		return __intel_wakeref_put_last(i915, wf, fn);
> +
> +	return 0;
> +}
> +
> +/**
> + * intel_wakeref_lock: Lock the wakeref (mutex)
> + * @wf: the wakeref
> + *
> + * Locks the wakeref to prevent it being acquired or released. New users
> + * can still adjust the counter, but the wakeref itself (and callback)
> + * cannot be acquired or released.
> + */
> +static inline void
> +intel_wakeref_lock(struct intel_wakeref *wf)
> +	__acquires(wf->mutex)
> +{
> +	mutex_lock(&wf->mutex);
> +}
> +
> +/**
> + * intel_wakeref_unlock: Unlock the wakeref
> + * @wf: the wakeref
> + *
> + * Releases a previously acquired intel_wakeref_lock().
> + */
> +static inline void
> +intel_wakeref_unlock(struct intel_wakeref *wf)
> +	__releases(wf->mutex)
> +{
> +	mutex_unlock(&wf->mutex);
> +}
> +
> +/**
> + * intel_wakeref_active: Query whether the wakeref is currently held
> + * @wf: the wakeref
> + *
> + * Returns: true if the wakeref is currently held.
> + */
> +static inline bool
> +intel_wakeref_active(struct intel_wakeref *wf)
> +{
> +	return atomic_read(&wf->count);
> +}
> +
> +#endif /* INTEL_WAKEREF_H */
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 21/32] drm/i915: Remove intel_context.active_link
  2019-04-17  7:56 ` [PATCH 21/32] drm/i915: Remove intel_context.active_link Chris Wilson
@ 2019-04-17  9:47   ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17  9:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> We no longer need to track the active intel_contexts within each engine,
> allowing us to drop a tricky mutex_lock from inside unpin (which may
> occur inside fs_reclaim).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.c           | 11 +----------
>   drivers/gpu/drm/i915/gt/intel_context_types.h     |  1 -
>   drivers/gpu/drm/i915/i915_debugfs.c               | 11 +++++++++--
>   drivers/gpu/drm/i915/i915_gem_context.c           |  2 --
>   drivers/gpu/drm/i915/i915_gem_context_types.h     |  1 -
>   drivers/gpu/drm/i915/selftests/i915_gem_context.c |  1 -
>   drivers/gpu/drm/i915/selftests/mock_context.c     |  1 -
>   7 files changed, 10 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 5e506e648454..1f1761fc6597 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -49,7 +49,6 @@ int __intel_context_do_pin(struct intel_context *ce)
>   		return -EINTR;
>   
>   	if (likely(!atomic_read(&ce->pin_count))) {
> -		struct i915_gem_context *ctx = ce->gem_context;
>   		intel_wakeref_t wakeref;
>   
>   		err = 0;
> @@ -58,11 +57,7 @@ int __intel_context_do_pin(struct intel_context *ce)
>   		if (err)
>   			goto err;
>   
> -		i915_gem_context_get(ctx);
> -
> -		mutex_lock(&ctx->mutex);
> -		list_add(&ce->active_link, &ctx->active_engines);
> -		mutex_unlock(&ctx->mutex);
> +		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
>   
>   		intel_context_get(ce);
>   		smp_mb__before_atomic(); /* flush pin before it is visible */
> @@ -91,10 +86,6 @@ void intel_context_unpin(struct intel_context *ce)
>   	if (likely(atomic_dec_and_test(&ce->pin_count))) {
>   		ce->ops->unpin(ce);
>   
> -		mutex_lock(&ce->gem_context->mutex);
> -		list_del(&ce->active_link);
> -		mutex_unlock(&ce->gem_context->mutex);
> -
>   		i915_gem_context_put(ce->gem_context);
>   		intel_context_put(ce);
>   	}
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 3579c2708321..d5a7dbd0daee 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -38,7 +38,6 @@ struct intel_context {
>   	struct intel_engine_cs *engine;
>   	struct intel_engine_cs *active;
>   
> -	struct list_head active_link;
>   	struct list_head signal_link;
>   	struct list_head signals;
>   
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 00d3ff746eb1..466becbb99c6 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -34,6 +34,7 @@
>   
>   #include "gt/intel_reset.h"
>   
> +#include "i915_gem_context.h"
>   #include "intel_dp.h"
>   #include "intel_drv.h"
>   #include "intel_fbc.h"
> @@ -396,14 +397,17 @@ static void print_context_stats(struct seq_file *m,
>   	struct i915_gem_context *ctx;
>   
>   	list_for_each_entry(ctx, &i915->contexts.list, link) {
> +		struct i915_gem_engines_iter it;
>   		struct intel_context *ce;
>   
> -		list_for_each_entry(ce, &ctx->active_engines, active_link) {
> +		for_each_gem_engine(ce,
> +				    i915_gem_context_lock_engines(ctx), it) {
>   			if (ce->state)
>   				per_file_stats(0, ce->state->obj, &kstats);
>   			if (ce->ring)
>   				per_file_stats(0, ce->ring->vma->obj, &kstats);
>   		}
> +		i915_gem_context_unlock_engines(ctx);
>   
>   		if (!IS_ERR_OR_NULL(ctx->file_priv)) {
>   			struct file_stats stats = { .vm = &ctx->ppgtt->vm, };
> @@ -1893,6 +1897,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
>   		return ret;
>   
>   	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> +		struct i915_gem_engines_iter it;
>   		struct intel_context *ce;
>   
>   		seq_puts(m, "HW context ");
> @@ -1917,7 +1922,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
>   		seq_putc(m, ctx->remap_slice ? 'R' : 'r');
>   		seq_putc(m, '\n');
>   
> -		list_for_each_entry(ce, &ctx->active_engines, active_link) {
> +		for_each_gem_engine(ce,
> +				    i915_gem_context_lock_engines(ctx), it) {
>   			seq_printf(m, "%s: ", ce->engine->name);
>   			if (ce->state)
>   				describe_obj(m, ce->state->obj);
> @@ -1925,6 +1931,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
>   				describe_ctx_ring(m, ce->ring);
>   			seq_putc(m, '\n');
>   		}
> +		i915_gem_context_unlock_engines(ctx);
>   
>   		seq_putc(m, '\n');
>   	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index a1e1874742a3..5b9feeb8d006 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -289,7 +289,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   {
>   	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
> -	GEM_BUG_ON(!list_empty(&ctx->active_engines));
>   
>   	release_hw_id(ctx);
>   	i915_ppgtt_put(ctx->ppgtt);
> @@ -416,7 +415,6 @@ __create_context(struct drm_i915_private *dev_priv)
>   	list_add_tail(&ctx->link, &dev_priv->contexts.list);
>   	ctx->i915 = dev_priv;
>   	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
> -	INIT_LIST_HEAD(&ctx->active_engines);
>   	mutex_init(&ctx->mutex);
>   
>   	mutex_init(&ctx->engines_mutex);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> index 5f84618cf7db..d5cb4f121aad 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> @@ -165,7 +165,6 @@ struct i915_gem_context {
>   	atomic_t hw_id_pin_count;
>   	struct list_head hw_id_link;
>   
> -	struct list_head active_engines;
>   	struct mutex mutex;
>   
>   	struct i915_sched_attr sched;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 7fd224a4ca4c..deedd1898fe5 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1675,7 +1675,6 @@ static int mock_context_barrier(void *arg)
>   		goto out;
>   	}
>   	i915_request_add(rq);
> -	GEM_BUG_ON(list_empty(&ctx->active_engines));
>   
>   	counter = 0;
>   	context_barrier_inject_fault = BIT(RCS0);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 71c750693585..10e67c931ed1 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -50,7 +50,6 @@ mock_context(struct drm_i915_private *i915,
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
>   	INIT_LIST_HEAD(&ctx->handles_list);
>   	INIT_LIST_HEAD(&ctx->hw_id_link);
> -	INIT_LIST_HEAD(&ctx->active_engines);
>   	mutex_init(&ctx->mutex);
>   
>   	ret = i915_gem_context_pin_hw_id(ctx);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 25/32] drm/i915: Allow a context to define its set of engines
  2019-04-17  7:56 ` [PATCH 25/32] drm/i915: Allow a context to define its set of engines Chris Wilson
@ 2019-04-17  9:50   ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17  9:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> Over the last few years, we have debated how to extend the user API to
> support an increase in the number of engines, that may be sparse and
> even be heterogeneous within a class (not all video decoders created
> equal). We settled on using (class, instance) tuples to identify a
> specific engine, with an API for the user to construct a map of engines
> to capabilities. Into this picture, we then add a challenge of virtual
> engines; one user engine that maps behind the scenes to any number of
> physical engines. To keep it general, we want the user to have full
> control over that mapping. To that end, we allow the user to constrain a
> context to define the set of engines that it can access, order fully
> controlled by the user via (class, instance). With such precise control
> in context setup, we can continue to use the existing execbuf uABI of
> specifying a single index; only now it doesn't automagically map onto
> the engines, it uses the user defined engine map from the context.
> 
> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> execbuf. It's use will be revealed in the next patch.
> 
> v2: Fixup freeing of local on success of get_engines()
> v3: Allow empty engines[]
> v4: s/nengine/num_engines/
> v5: Replace 64 limit on num_engines with a note that execbuf is
> currently limited to only using the first 64 engines.
> v6: Actually use the engines_mutex to guard the ctx->engines.
> 
> Testcase: igt/gem_ctx_engines
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c       | 219 +++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_context.h       |  18 ++
>   drivers/gpu/drm/i915/i915_gem_context_types.h |   1 +
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c    |   5 +-
>   drivers/gpu/drm/i915/i915_utils.h             |  36 +++
>   include/uapi/drm/i915_drm.h                   |  31 +++
>   6 files changed, 303 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 217d4fe0349d..b4b7a2eee1c9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -90,7 +90,6 @@
>   #include <drm/i915_drm.h>
>   
>   #include "gt/intel_lrc_reg.h"
> -#include "gt/intel_workarounds.h"
>   
>   #include "i915_drv.h"
>   #include "i915_globals.h"
> @@ -143,13 +142,17 @@ static void lut_close(struct i915_gem_context *ctx)
>   static struct intel_context *
>   lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance)
>   {
> -	struct intel_engine_cs *engine;
> +	if (!i915_gem_context_user_engines(ctx)) {
> +		struct intel_engine_cs *engine;
>   
> -	engine = intel_engine_lookup_user(ctx->i915, class, instance);
> -	if (!engine)
> -		return ERR_PTR(-EINVAL);
> +		engine = intel_engine_lookup_user(ctx->i915, class, instance);
> +		if (!engine)
> +			return ERR_PTR(-EINVAL);
> +
> +		instance = engine->id;
> +	}
>   
> -	return i915_gem_context_get_engine(ctx, engine->id);
> +	return i915_gem_context_get_engine(ctx, instance);
>   }
>   
>   static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
> @@ -257,6 +260,17 @@ static void free_engines(struct i915_gem_engines *e)
>   	__free_engines(e, e->num_engines);
>   }
>   
> +static void free_engines_rcu(struct work_struct *wrk)
> +{
> +	struct i915_gem_engines *e =
> +		container_of(wrk, struct i915_gem_engines, rcu.work);
> +	struct drm_i915_private *i915 = e->i915;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	free_engines(e);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +}
> +
>   static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
>   {
>   	struct intel_engine_cs *engine;
> @@ -1382,6 +1396,191 @@ static int set_sseu(struct i915_gem_context *ctx,
>   	return ret;
>   }
>   
> +struct set_engines {
> +	struct i915_gem_context *ctx;
> +	struct i915_gem_engines *engines;
> +};
> +
> +static const i915_user_extension_fn set_engines__extensions[] = {
> +};
> +
> +static int
> +set_engines(struct i915_gem_context *ctx,
> +	    const struct drm_i915_gem_context_param *args)
> +{
> +	struct i915_context_param_engines __user *user =
> +		u64_to_user_ptr(args->value);
> +	struct set_engines set = { .ctx = ctx };
> +	unsigned int num_engines, n;
> +	u64 extensions;
> +	int err;
> +
> +	if (!args->size) { /* switch back to legacy user_ring_map */
> +		if (!i915_gem_context_user_engines(ctx))
> +			return 0;
> +
> +		set.engines = default_engines(ctx);
> +		if (IS_ERR(set.engines))
> +			return PTR_ERR(set.engines);
> +
> +		goto replace;
> +	}
> +
> +	BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines)));
> +	if (args->size < sizeof(*user) ||
> +	    !IS_ALIGNED(args->size, sizeof(*user->engines))) {
> +		DRM_DEBUG("Invalid size for engine array: %d\n",
> +			  args->size);
> +		return -EINVAL;
> +	}
> +
> +	/*
> +	 * Note that I915_EXEC_RING_MASK limits execbuf to only using the
> +	 * first 64 engines defined here.
> +	 */
> +	num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines);
> +
> +	set.engines = kmalloc(struct_size(set.engines, engines, num_engines),
> +			      GFP_KERNEL);
> +	if (!set.engines)
> +		return -ENOMEM;
> +
> +	set.engines->i915 = ctx->i915;
> +	for (n = 0; n < num_engines; n++) {
> +		struct i915_engine_class_instance ci;
> +		struct intel_engine_cs *engine;
> +
> +		if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
> +			__free_engines(set.engines, n);
> +			return -EFAULT;
> +		}
> +
> +		if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID &&
> +		    ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
> +			set.engines->engines[n] = NULL;
> +			continue;
> +		}
> +
> +		engine = intel_engine_lookup_user(ctx->i915,
> +						  ci.engine_class,
> +						  ci.engine_instance);
> +		if (!engine) {
> +			DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n",
> +				  n, ci.engine_class, ci.engine_instance);
> +			__free_engines(set.engines, n);
> +			return -ENOENT;
> +		}
> +
> +		set.engines->engines[n] = intel_context_create(ctx, engine);
> +		if (!set.engines->engines[n]) {
> +			__free_engines(set.engines, n);
> +			return -ENOMEM;
> +		}
> +	}
> +	set.engines->num_engines = num_engines;
> +
> +	err = -EFAULT;
> +	if (!get_user(extensions, &user->extensions))
> +		err = i915_user_extensions(u64_to_user_ptr(extensions),
> +					   set_engines__extensions,
> +					   ARRAY_SIZE(set_engines__extensions),
> +					   &set);
> +	if (err) {
> +		free_engines(set.engines);
> +		return err;
> +	}
> +
> +replace:
> +	mutex_lock(&ctx->engines_mutex);
> +	if (args->size)
> +		i915_gem_context_set_user_engines(ctx);
> +	else
> +		i915_gem_context_clear_user_engines(ctx);
> +	rcu_swap_protected(ctx->engines, set.engines, 1);
> +	mutex_unlock(&ctx->engines_mutex);
> +
> +	INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu);
> +	queue_rcu_work(system_wq, &set.engines->rcu);
> +
> +	return 0;
> +}
> +
> +static int
> +get_engines(struct i915_gem_context *ctx,
> +	    struct drm_i915_gem_context_param *args)
> +{
> +	struct i915_context_param_engines __user *user;
> +	struct i915_gem_engines *e;
> +	size_t n, count, size;
> +	int err = 0;
> +
> +	err = mutex_lock_interruptible(&ctx->engines_mutex);
> +	if (err)
> +		return err;
> +
> +	if (!i915_gem_context_user_engines(ctx)) {
> +		args->size = 0;
> +		goto unlock;
> +	}
> +
> +	e = i915_gem_context_engines(ctx);
> +	count = e->num_engines;
> +
> +	/* Be paranoid in case we have an impedance mismatch */
> +	if (!check_struct_size(user, engines, count, &size)) {
> +		err = -EINVAL;
> +		goto unlock;
> +	}
> +	if (overflows_type(size, args->size)) {
> +		err = -EINVAL;
> +		goto unlock;
> +	}
> +
> +	if (!args->size) {
> +		args->size = size;
> +		goto unlock;
> +	}
> +
> +	if (args->size < size) {
> +		err = -EINVAL;
> +		goto unlock;
> +	}
> +
> +	user = u64_to_user_ptr(args->value);
> +	if (!access_ok(user, size)) {
> +		err = -EFAULT;
> +		goto unlock;
> +	}
> +
> +	if (put_user(0, &user->extensions)) {
> +		err = -EFAULT;
> +		goto unlock;
> +	}
> +
> +	for (n = 0; n < count; n++) {
> +		struct i915_engine_class_instance ci = {
> +			.engine_class = I915_ENGINE_CLASS_INVALID,
> +			.engine_instance = I915_ENGINE_CLASS_INVALID_NONE,
> +		};
> +
> +		if (e->engines[n]) {
> +			ci.engine_class = e->engines[n]->engine->uabi_class;
> +			ci.engine_instance = e->engines[n]->engine->instance;
> +		}
> +
> +		if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
> +			err = -EFAULT;
> +			goto unlock;
> +		}
> +	}
> +
> +	args->size = size;
> +
> +unlock:
> +	mutex_unlock(&ctx->engines_mutex);
> +	return err;
> +}
> +
>   static int ctx_setparam(struct drm_i915_file_private *fpriv,
>   			struct i915_gem_context *ctx,
>   			struct drm_i915_gem_context_param *args)
> @@ -1455,6 +1654,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
>   		ret = set_ppgtt(fpriv, ctx, args);
>   		break;
>   
> +	case I915_CONTEXT_PARAM_ENGINES:
> +		ret = set_engines(ctx, args);
> +		break;
> +
>   	case I915_CONTEXT_PARAM_BAN_PERIOD:
>   	default:
>   		ret = -EINVAL;
> @@ -1685,6 +1888,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   		ret = get_ppgtt(file_priv, ctx, args);
>   		break;
>   
> +	case I915_CONTEXT_PARAM_ENGINES:
> +		ret = get_engines(ctx, args);
> +		break;
> +
>   	case I915_CONTEXT_PARAM_BAN_PERIOD:
>   	default:
>   		ret = -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 272e183ebc0c..9ad4a6362438 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -112,6 +112,24 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
>   	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
>   }
>   
> +static inline bool
> +i915_gem_context_user_engines(const struct i915_gem_context *ctx)
> +{
> +	return test_bit(CONTEXT_USER_ENGINES, &ctx->flags);
> +}
> +
> +static inline void
> +i915_gem_context_set_user_engines(struct i915_gem_context *ctx)
> +{
> +	set_bit(CONTEXT_USER_ENGINES, &ctx->flags);
> +}
> +
> +static inline void
> +i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
> +{
> +	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
> +}
> +
>   int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
>   static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> index d5cb4f121aad..fb965ded2508 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> @@ -146,6 +146,7 @@ struct i915_gem_context {
>   #define CONTEXT_BANNED			0
>   #define CONTEXT_CLOSED			1
>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
> +#define CONTEXT_USER_ENGINES		3
>   
>   	/**
>   	 * @hw_id: - unique identifier for the context
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 679f7c1561ba..d6c5220addd0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2165,7 +2165,10 @@ eb_select_engine(struct i915_execbuffer *eb,
>   	unsigned int idx;
>   	int err;
>   
> -	idx = eb_select_legacy_ring(eb, file, args);
> +	if (i915_gem_context_user_engines(eb->gem_context))
> +		idx = args->flags & I915_EXEC_RING_MASK;
> +	else
> +		idx = eb_select_legacy_ring(eb, file, args);
>   
>   	ce = i915_gem_context_get_engine(eb->gem_context, idx);
>   	if (IS_ERR(ce))
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index 2dbe8933b50a..1436fe2fb5f8 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -25,6 +25,9 @@
>   #ifndef __I915_UTILS_H
>   #define __I915_UTILS_H
>   
> +#include <linux/kernel.h>
> +#include <linux/overflow.h>
> +
>   #undef WARN_ON
>   /* Many gcc seem to no see through this and fall over :( */
>   #if 0
> @@ -73,6 +76,39 @@
>   #define overflows_type(x, T) \
>   	(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
>   
> +static inline bool
> +__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
> +{
> +	size_t sz;
> +
> +	if (check_mul_overflow(count, arr, &sz))
> +		return false;
> +
> +	if (check_add_overflow(sz, base, &sz))
> +		return false;
> +
> +	*size = sz;
> +	return true;
> +}
> +
> +/**
> + * check_struct_size() - Calculate size of structure with trailing array.
> + * @p: Pointer to the structure.
> + * @member: Name of the array member.
> + * @n: Number of elements in the array.
> + * @sz: Total size of structure and array
> + *
> + * Calculates size of memory needed for structure @p followed by an
> + * array of @n @member elements, like struct_size() but reports
> + * whether it overflowed, and the resultant size in @sz
> + *
> + * Return: false if the calculation overflowed.
> + */
> +#define check_struct_size(p, member, n, sz) \
> +	likely(__check_struct_size(sizeof(*(p)), \
> +				   sizeof(*(p)->member) + __must_be_array((p)->member), \
> +				   n, sz))
> +
>   #define ptr_mask_bits(ptr, n) ({					\
>   	unsigned long __v = (unsigned long)(ptr);			\
>   	(typeof(ptr))(__v & -BIT(n));					\
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index d6ad4a15b2b9..8e1bb22926e4 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -136,6 +136,7 @@ enum drm_i915_gem_engine_class {
>   struct i915_engine_class_instance {
>   	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
>   	__u16 engine_instance;
> +#define I915_ENGINE_CLASS_INVALID_NONE -1
>   };
>   
>   /**
> @@ -1522,6 +1523,26 @@ struct drm_i915_gem_context_param {
>   	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
>   	 */
>   #define I915_CONTEXT_PARAM_VM		0x9
> +
> +/*
> + * I915_CONTEXT_PARAM_ENGINES:
> + *
> + * Bind this context to operate on this subset of available engines. Henceforth,
> + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
> + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
> + * and upwards. Slots 0...N are filled in using the specified (class, instance).
> + * Use
> + *	engine_class: I915_ENGINE_CLASS_INVALID,
> + *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
> + * to specify a gap in the array that can be filled in later, e.g. by a
> + * virtual engine used for load balancing.
> + *
> + * Setting the number of engines bound to the context to 0, by passing a zero
> + * sized argument, will revert back to default settings.
> + *
> + * See struct i915_context_param_engines.
> + */
> +#define I915_CONTEXT_PARAM_ENGINES	0xa
>   /* Must be kept compact -- no holes and well documented */
>   
>   	__u64 value;
> @@ -1585,6 +1606,16 @@ struct drm_i915_gem_context_param_sseu {
>   	__u32 rsvd;
>   };
>   
> +struct i915_context_param_engines {
> +	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
> +	struct i915_engine_class_instance engines[0];
> +} __attribute__((packed));
> +
> +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
> +	__u64 extensions; \
> +	struct i915_engine_class_instance engines[N__]; \
> +} __attribute__((packed)) name__
> +
>   struct drm_i915_gem_context_create_ext_setparam {
>   #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
>   	struct i915_user_extension base;
> 

Not 100% to my liking but only in details.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 27/32] drm/i915: Allow userspace to clone contexts on creation
  2019-04-17  7:56 ` [PATCH 27/32] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
@ 2019-04-17  9:50   ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17  9:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> A usecase arose out of handling context recovery in mesa, whereby they
> wish to recreate a context with fresh logical state but preserving all
> other details of the original. Currently, they create a new context and
> iterate over which bits they want to copy across, but it would much more
> convenient if they were able to just pass in a target context to clone
> during creation. This essentially extends the setparam during creation
> to pull the details from a target context instead of the user supplied
> parameters.
> 
> The ideal here is that we don't expose control over anything more than
> can be obtained via CONTEXT_PARAM. That is userspace retains explicit
> control over all features, and this api is just convenience.
> 
> For example, you could replace
> 
> 	struct context_param p = { .param = CONTEXT_PARAM_VM };
> 
> 	param.ctx_id = old_id;
> 	gem_context_get_param(&p.param);
> 
> 	new_id = gem_context_create();
> 
> 	param.ctx_id = new_id;
> 	gem_context_set_param(&p.param);
> 
> 	gem_vm_destroy(param.value); /* drop the ref to VM_ID handle */
> 
> with
> 
> 	struct create_ext_param p = {
> 	  { .name = CONTEXT_CREATE_CLONE },
> 	  .clone_id = old_id,
> 	  .flags = CLONE_FLAGS_VM
> 	}
> 	new_id = gem_context_create_ext(&p);
> 
> and not have to worry about stray namespace pollution etc.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c | 206 ++++++++++++++++++++++++
>   include/uapi/drm/i915_drm.h             |  15 ++
>   2 files changed, 221 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index d6bea51050c0..ba7582d955d1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1682,8 +1682,214 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
>   	return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
>   }
>   
> +static int clone_engines(struct i915_gem_context *dst,
> +			 struct i915_gem_context *src)
> +{
> +	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
> +	struct i915_gem_engines *clone;
> +	bool user_engines;
> +	unsigned long n;
> +
> +	clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL);
> +	if (!clone)
> +		goto err_unlock;
> +
> +	clone->i915 = dst->i915;
> +	for (n = 0; n < e->num_engines; n++) {
> +		if (!e->engines[n]) {
> +			clone->engines[n] = NULL;
> +			continue;
> +		}
> +
> +		clone->engines[n] =
> +			intel_context_create(dst, e->engines[n]->engine);
> +		if (!clone->engines[n]) {
> +			__free_engines(clone, n);
> +			goto err_unlock;
> +		}
> +	}
> +	clone->num_engines = n;
> +
> +	user_engines = i915_gem_context_user_engines(src);
> +	i915_gem_context_unlock_engines(src);
> +
> +	free_engines(dst->engines);
> +	RCU_INIT_POINTER(dst->engines, clone);
> +	if (user_engines)
> +		i915_gem_context_set_user_engines(dst);
> +	else
> +		i915_gem_context_clear_user_engines(dst);
> +	return 0;
> +
> +err_unlock:
> +	i915_gem_context_unlock_engines(src);
> +	return -ENOMEM;
> +}
> +
> +static int clone_flags(struct i915_gem_context *dst,
> +		       struct i915_gem_context *src)
> +{
> +	dst->user_flags = src->user_flags;
> +	return 0;
> +}
> +
> +static int clone_schedattr(struct i915_gem_context *dst,
> +			   struct i915_gem_context *src)
> +{
> +	dst->sched = src->sched;
> +	return 0;
> +}
> +
> +static int clone_sseu(struct i915_gem_context *dst,
> +		      struct i915_gem_context *src)
> +{
> +	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
> +	struct i915_gem_engines *clone;
> +	unsigned long n;
> +	int err;
> +
> +	clone = dst->engines; /* no locking required; sole access */
> +	if (e->num_engines != clone->num_engines) {
> +		err = -EINVAL;
> +		goto unlock;
> +	}
> +
> +	for (n = 0; n < e->num_engines; n++) {
> +		struct intel_context *ce = e->engines[n];
> +
> +		if (clone->engines[n]->engine->class != ce->engine->class) {
> +			/* Must have compatible engine maps! */
> +			err = -EINVAL;
> +			goto unlock;
> +		}
> +
> +		/* serialises with set_sseu */
> +		err = intel_context_lock_pinned(ce);
> +		if (err)
> +			goto unlock;
> +
> +		clone->engines[n]->sseu = ce->sseu;
> +		intel_context_unlock_pinned(ce);
> +	}
> +
> +	err = 0;
> +unlock:
> +	i915_gem_context_unlock_engines(src);
> +	return err;
> +}
> +
> +static int clone_timeline(struct i915_gem_context *dst,
> +			  struct i915_gem_context *src)
> +{
> +	if (src->timeline) {
> +		GEM_BUG_ON(src->timeline == dst->timeline);
> +
> +		if (dst->timeline)
> +			i915_timeline_put(dst->timeline);
> +		dst->timeline = i915_timeline_get(src->timeline);
> +	}
> +
> +	return 0;
> +}
> +
> +static int clone_vm(struct i915_gem_context *dst,
> +		    struct i915_gem_context *src)
> +{
> +	struct i915_hw_ppgtt *ppgtt;
> +
> +	rcu_read_lock();
> +	do {
> +		ppgtt = READ_ONCE(src->ppgtt);
> +		if (!ppgtt)
> +			break;
> +
> +		if (!kref_get_unless_zero(&ppgtt->ref))
> +			continue;
> +
> +		/*
> +		 * This ppgtt may have be reallocated between
> +		 * the read and the kref, and reassigned to a third
> +		 * context. In order to avoid inadvertent sharing
> +		 * of this ppgtt with that third context (and not
> +		 * src), we have to confirm that we have the same
> +		 * ppgtt after passing through the strong memory
> +		 * barrier implied by a successful
> +		 * kref_get_unless_zero().
> +		 *
> +		 * Once we have acquired the current ppgtt of src,
> +		 * we no longer care if it is released from src, as
> +		 * it cannot be reallocated elsewhere.
> +		 */
> +
> +		if (ppgtt == READ_ONCE(src->ppgtt))
> +			break;
> +
> +		i915_ppgtt_put(ppgtt);
> +	} while (1);
> +	rcu_read_unlock();
> +
> +	if (ppgtt) {
> +		__assign_ppgtt(dst, ppgtt);
> +		i915_ppgtt_put(ppgtt);
> +	}
> +
> +	return 0;
> +}
> +
> +static int create_clone(struct i915_user_extension __user *ext, void *data)
> +{
> +	static int (* const fn[])(struct i915_gem_context *dst,
> +				  struct i915_gem_context *src) = {
> +#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
> +		MAP(ENGINES, clone_engines),
> +		MAP(FLAGS, clone_flags),
> +		MAP(SCHEDATTR, clone_schedattr),
> +		MAP(SSEU, clone_sseu),
> +		MAP(TIMELINE, clone_timeline),
> +		MAP(VM, clone_vm),
> +#undef MAP
> +	};
> +	struct drm_i915_gem_context_create_ext_clone local;
> +	const struct create_ext *arg = data;
> +	struct i915_gem_context *dst = arg->ctx;
> +	struct i915_gem_context *src;
> +	int err, bit;
> +
> +	if (copy_from_user(&local, ext, sizeof(local)))
> +		return -EFAULT;
> +
> +	BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) !=
> +		     I915_CONTEXT_CLONE_UNKNOWN);
> +
> +	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
> +		return -EINVAL;
> +
> +	if (local.rsvd)
> +		return -EINVAL;
> +
> +	rcu_read_lock();
> +	src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id);
> +	rcu_read_unlock();
> +	if (!src)
> +		return -ENOENT;
> +
> +	GEM_BUG_ON(src == dst);
> +
> +	for (bit = 0; bit < ARRAY_SIZE(fn); bit++) {
> +		if (!(local.flags & BIT(bit)))
> +			continue;
> +
> +		err = fn[bit](dst, src);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
>   static const i915_user_extension_fn create_extensions[] = {
>   	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
> +	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
>   };
>   
>   static bool client_is_banned(struct drm_i915_file_private *file_priv)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7aef672ab3c7..7694113362d4 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1623,6 +1623,21 @@ struct drm_i915_gem_context_create_ext_setparam {
>   	struct drm_i915_gem_context_param param;
>   };
>   
> +struct drm_i915_gem_context_create_ext_clone {
> +#define I915_CONTEXT_CREATE_EXT_CLONE 1
> +	struct i915_user_extension base;
> +	__u32 clone_id;
> +	__u32 flags;
> +#define I915_CONTEXT_CLONE_ENGINES	(1u << 0)
> +#define I915_CONTEXT_CLONE_FLAGS	(1u << 1)
> +#define I915_CONTEXT_CLONE_SCHEDATTR	(1u << 2)
> +#define I915_CONTEXT_CLONE_SSEU		(1u << 3)
> +#define I915_CONTEXT_CLONE_TIMELINE	(1u << 4)
> +#define I915_CONTEXT_CLONE_VM		(1u << 5)
> +#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
> +	__u64 rsvd;
> +};
> +
>   struct drm_i915_gem_context_destroy {
>   	__u32 ctx_id;
>   	__u32 pad;
> 

Carry over for good measure:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 28/32] drm/i915: Load balancing across a virtual engine
  2019-04-17  7:56 ` [PATCH 28/32] drm/i915: Load balancing across a virtual engine Chris Wilson
@ 2019-04-17 11:26   ` Tvrtko Ursulin
  2019-04-17 13:51     ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17 11:26 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> Having allowed the user to define a set of engines that they will want
> to only use, we go one step further and allow them to bind those engines
> into a single virtual instance. Submitting a batch to the virtual engine
> will then forward it to any one of the set in a manner as best to
> distribute load.  The virtual engine has a single timeline across all
> engines (it operates as a single queue), so it is not able to concurrently
> run batches across multiple engines by itself; that is left up to the user
> to submit multiple concurrent batches to multiple queues. Multiple users
> will be load balanced across the system.
> 
> The mechanism used for load balancing in this patch is a late greedy
> balancer. When a request is ready for execution, it is added to each
> engine's queue, and when an engine is ready for its next request it
> claims it from the virtual engine. The first engine to do so, wins, i.e.
> the request is executed at the earliest opportunity (idle moment) in the
> system.
> 
> As not all HW is created equal, the user is still able to skip the
> virtual engine and execute the batch on a specific engine, all within the
> same queue. It will then be executed in order on the correct engine,
> with execution on other virtual engines being moved away due to the load
> detection.
> 
> A couple of areas for potential improvement left!
> 
> - The virtual engine always take priority over equal-priority tasks.
> Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
> and hopefully the virtual and real engines are not then congested (i.e.
> all work is via virtual engines, or all work is to the real engine).
> 
> - We require the breadcrumb irq around every virtual engine request. For
> normal engines, we eliminate the need for the slow round trip via
> interrupt by using the submit fence and queueing in order. For virtual
> engines, we have to allow any job to transfer to a new ring, and cannot
> coalesce the submissions, so require the completion fence instead,
> forcing the persistent use of interrupts.
> 
> - We only drip feed single requests through each virtual engine and onto
> the physical engines, even if there was enough work to fill all ELSP,
> leaving small stalls with an idle CS event at the end of every request.
> Could we be greedy and fill both slots? Being lazy is virtuous for load
> distribution on less-than-full workloads though.
> 
> Other areas of improvement are more general, such as reducing lock
> contention, reducing dispatch overhead, looking at direct submission
> rather than bouncing around tasklets etc.
> 
> sseu: Lift the restriction to allow sseu to be reconfigured on virtual
> engines composed of RENDER_CLASS (rcs).
> 
> v2: macroize check_user_mbz()
> v3: Cancel virtual engines on wedging
> v4: Commence commenting
> v5: Replace 64b sibling_mask with a list of class:instance
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c  |   6 +-
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |   8 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 611 ++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/intel_lrc.h          |   9 +
>   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 180 ++++++
>   drivers/gpu/drm/i915/i915_gem.h              |   5 +
>   drivers/gpu/drm/i915/i915_gem_context.c      | 118 +++-
>   drivers/gpu/drm/i915/i915_scheduler.c        |  18 +-
>   drivers/gpu/drm/i915/i915_timeline_types.h   |   1 +
>   include/uapi/drm/i915_drm.h                  |  39 ++
>   10 files changed, 978 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index e19f84b006cc..f900f0680647 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -290,8 +290,12 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   				break;
>   		}
>   		list_add(&rq->signal_link, pos);
> -		if (pos == &ce->signals) /* catch transitions from empty list */
> +		if (pos == &ce->signals) { /* catch transitions from empty */
>   			list_move_tail(&ce->signal_link, &b->signalers);
> +		} else if (ce->engine != rq->engine) { /* virtualised */
> +			list_move_tail(&ce->signal_link, &b->signalers);
> +			intel_engine_queue_breadcrumbs(rq->engine);

Is there significance in check not being based on engine->flags & VIRTUAL?

Actually, the signaling can get enabled either on the virtual or real 
engine, depending on timing. I don't see that irq_enable/disable vfuncs 
will be present on the veng though. So how does that work?

Maybe there is a clue in presence of intel_engine_queue_breadcrumbs but 
I don't see it.

> +		}
>   
>   		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   		spin_unlock(&b->irq_lock);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index d972c339309c..6dceb78e95d7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -227,6 +227,7 @@ struct intel_engine_execlists {
>   	 * @queue: queue of requests, in priority lists
>   	 */
>   	struct rb_root_cached queue;
> +	struct rb_root_cached virtual;
>   
>   	/**
>   	 * @csb_write: control register for Context Switch buffer
> @@ -445,6 +446,7 @@ struct intel_engine_cs {
>   #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
>   #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
>   #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
> +#define I915_ENGINE_IS_VIRTUAL       BIT(5)
>   	unsigned int flags;
>   
>   	/*
> @@ -534,6 +536,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
>   	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
>   }
>   
> +static inline bool
> +intel_engine_is_virtual(const struct intel_engine_cs *engine)
> +{
> +	return engine->flags & I915_ENGINE_IS_VIRTUAL;
> +}
> +
>   #define instdone_slice_mask(dev_priv__) \
>   	(IS_GEN(dev_priv__, 7) ? \
>   	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 01f58a152a9e..d6efd6aa67cb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -136,6 +136,7 @@
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
>   #include "i915_vgpu.h"
> +#include "intel_engine_pm.h"
>   #include "intel_lrc_reg.h"
>   #include "intel_mocs.h"
>   #include "intel_reset.h"
> @@ -165,6 +166,41 @@
>   
>   #define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
>   
> +struct virtual_engine {
> +	struct intel_engine_cs base;
> +	struct intel_context context;
> +
> +	/*
> +	 * We allow only a single request through the virtual engine at a time
> +	 * (each request in the timeline waits for the completion fence of
> +	 * the previous before being submitted). By restricting ourselves to
> +	 * only submitting a single request, each request is placed on to a
> +	 * physical to maximise load spreading (by virtue of the late greedy
> +	 * scheduling -- each real engine takes the next available request
> +	 * upon idling).
> +	 */
> +	struct i915_request *request;
> +
> +	/*
> +	 * We keep a rbtree of available virtual engines inside each physical
> +	 * engine, sorted by priority. Here we preallocate the nodes we need
> +	 * for the virtual engine, indexed by physical_engine->id.
> +	 */
> +	struct ve_node {
> +		struct rb_node rb;
> +		int prio;
> +	} nodes[I915_NUM_ENGINES];
> +
> +	/* And finally, which physical engines this virtual engine maps onto. */
> +	unsigned int num_siblings;
> +	struct intel_engine_cs *siblings[0];
> +};
> +
> +static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
> +{
> +	return container_of(engine, struct virtual_engine, base);
> +}
> +
>   static int execlists_context_deferred_alloc(struct intel_context *ce,
>   					    struct intel_engine_cs *engine);
>   static void execlists_init_reg_state(u32 *reg_state,
> @@ -228,7 +264,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
>   }
>   
>   static inline bool need_preempt(const struct intel_engine_cs *engine,
> -				const struct i915_request *rq)
> +				const struct i915_request *rq,
> +				struct rb_node *rb)
>   {
>   	int last_prio;
>   
> @@ -263,6 +300,22 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>   	    rq_prio(list_next_entry(rq, link)) > last_prio)
>   		return true;
>   
> +	if (rb) { /* XXX virtual precedence */
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		bool preempt = false;
> +
> +		if (engine == ve->siblings[0]) { /* only preempt one sibling */
> +			spin_lock(&ve->base.timeline.lock);
> +			if (ve->request)
> +				preempt = rq_prio(ve->request) > last_prio;
> +			spin_unlock(&ve->base.timeline.lock);
> +		}
> +
> +		if (preempt)
> +			return preempt;
> +	}
> +
>   	/*
>   	 * If the inflight context did not trigger the preemption, then maybe
>   	 * it was the set of queued requests? Pick the highest priority in
> @@ -381,6 +434,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   	list_for_each_entry_safe_reverse(rq, rn,
>   					 &engine->timeline.requests,
>   					 link) {
> +		struct intel_engine_cs *owner;
> +
>   		if (i915_request_completed(rq))
>   			break;
>   
> @@ -389,14 +444,30 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   
>   		GEM_BUG_ON(rq->hw_context->active);
>   
> -		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> -		if (rq_prio(rq) != prio) {
> -			prio = rq_prio(rq);
> -			pl = i915_sched_lookup_priolist(engine, prio);
> -		}
> -		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> +		/*
> +		 * Push the request back into the queue for later resubmission.
> +		 * If this request is not native to this physical engine (i.e.
> +		 * it came from a virtual source), push it back onto the virtual
> +		 * engine so that it can be moved across onto another physical
> +		 * engine as load dictates.
> +		 */
> +		owner = rq->hw_context->engine;
> +		if (likely(owner == engine)) {
> +			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> +			if (rq_prio(rq) != prio) {
> +				prio = rq_prio(rq);
> +				pl = i915_sched_lookup_priolist(engine, prio);
> +			}
> +			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
>   
> -		list_add(&rq->sched.link, pl);
> +			list_add(&rq->sched.link, pl);
> +		} else {
> +			if (__i915_request_has_started(rq))
> +				rq->sched.attr.priority |= ACTIVE_PRIORITY;
> +
> +			rq->engine = owner;
> +			owner->submit_request(rq);
> +		}
>   
>   		active = rq;
>   	}
> @@ -658,6 +729,72 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
>   						  execlists));
>   }
>   
> +static void virtual_update_register_offsets(u32 *regs,
> +					    struct intel_engine_cs *engine)
> +{
> +	u32 base = engine->mmio_base;
> +
> +	regs[CTX_CONTEXT_CONTROL] =
> +		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
> +	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
> +	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
> +	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
> +	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
> +
> +	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
> +	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
> +	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
> +	regs[CTX_SECOND_BB_HEAD_U] =
> +		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
> +	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
> +	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
> +
> +	regs[CTX_CTX_TIMESTAMP] =
> +		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
> +	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
> +	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
> +	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
> +	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
> +	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
> +	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
> +	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
> +	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
> +
> +	if (engine->class == RENDER_CLASS) {
> +		regs[CTX_RCS_INDIRECT_CTX] =
> +			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
> +		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
> +			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
> +		regs[CTX_BB_PER_CTX_PTR] =
> +			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
> +
> +		regs[CTX_R_PWR_CLK_STATE] =
> +			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> +	}
> +}
> +
> +static bool virtual_matches(const struct virtual_engine *ve,
> +			    const struct i915_request *rq,
> +			    const struct intel_engine_cs *engine)
> +{
> +	const struct intel_engine_cs *active;
> +
> +	/*
> +	 * We track when the HW has completed saving the context image
> +	 * (i.e. when we have seen the final CS event switching out of
> +	 * the context) and must not overwrite the context image before
> +	 * then. This restricts us to only using the active engine
> +	 * while the previous virtualized request is inflight (so
> +	 * we reuse the register offsets). This is a very small
> +	 * hystersis on the greedy seelction algorithm.
> +	 */
> +	active = READ_ONCE(ve->context.active);
> +	if (active && active != engine)
> +		return false;
> +
> +	return true;
> +}
> +
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -690,6 +827,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	 * and context switches) submission.
>   	 */
>   
> +	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq = READ_ONCE(ve->request);
> +
> +		if (!rq) { /* lazily cleanup after another engine handled rq */
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +			rb = rb_first_cached(&execlists->virtual);
> +			continue;
> +		}
> +
> +		if (!virtual_matches(ve, rq, engine)) {
> +			rb = rb_next(rb);
> +			continue;
> +		}

Can this create a problem where, since the queue is supposed to be in 
submission order, here it skips some requests if the owning veng is 
already executing something and so inverts the dependency chain?

Before semaphore I guess it wasn't possible since only runnable requests 
would be in the queue. But with semaphores I am not sure. Paranoid thing 
would be to not dequeue any other veng request if engine is busy with 
some veng. But maybe that would serialize things too much. Or it is too 
paranoid?

> +
> +		break;
> +	}
> +
>   	if (last) {
>   		/*
>   		 * Don't resubmit or switch until all outstanding
> @@ -711,7 +868,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
>   			return;
>   
> -		if (need_preempt(engine, last)) {
> +		if (need_preempt(engine, last, rb)) {
>   			inject_preempt_context(engine);
>   			return;
>   		}
> @@ -751,6 +908,89 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		last->tail = last->wa_tail;
>   	}
>   
> +	while (rb) { /* XXX virtual is always taking precedence */
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq;
> +
> +		spin_lock(&ve->base.timeline.lock);
> +
> +		rq = ve->request;
> +		if (unlikely(!rq)) { /* lost the race to a sibling */
> +			spin_unlock(&ve->base.timeline.lock);
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +			rb = rb_first_cached(&execlists->virtual);
> +			continue;
> +		}
> +
> +		GEM_BUG_ON(rq != ve->request);
> +		GEM_BUG_ON(rq->engine != &ve->base);
> +		GEM_BUG_ON(rq->hw_context != &ve->context);
> +
> +		if (rq_prio(rq) >= queue_prio(execlists)) {
> +			if (!virtual_matches(ve, rq, engine)) {
> +				spin_unlock(&ve->base.timeline.lock);
> +				rb = rb_next(rb);
> +				continue;

Is this needed? The first virtual_matches loop skipped all requests 
which shouldn't be dequeued and leaves rb pointing to the top priority 
one which can.

> +			} > +
> +			if (last && !can_merge_rq(last, rq)) {
> +				spin_unlock(&ve->base.timeline.lock);
> +				return; /* leave this rq for another engine */
> +			}
> +
> +			GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
> +				  engine->name,
> +				  rq->fence.context,
> +				  rq->fence.seqno,
> +				  i915_request_completed(rq) ? "!" :
> +				  i915_request_started(rq) ? "*" :
> +				  "",
> +				  yesno(engine != ve->siblings[0]));
> +
> +			ve->request = NULL;
> +			ve->base.execlists.queue_priority_hint = INT_MIN;
> +			rb_erase_cached(rb, &execlists->virtual);
> +			RB_CLEAR_NODE(rb);
> +
> +			rq->engine = engine;
> +
> +			if (engine != ve->siblings[0]) {
> +				u32 *regs = ve->context.lrc_reg_state;
> +				unsigned int n;
> +
> +				GEM_BUG_ON(READ_ONCE(ve->context.active));
> +				virtual_update_register_offsets(regs, engine);
> +
> +				/*
> +				 * Move the bound engine to the top of the list
> +				 * for future execution. We then kick this
> +				 * tasklet first before checking others, so that
> +				 * we preferentially reuse this set of bound
> +				 * registers.
> +				 */
> +				for (n = 1; n < ve->num_siblings; n++) {
> +					if (ve->siblings[n] == engine) {
> +						swap(ve->siblings[n],
> +						     ve->siblings[0]);
> +						break;
> +					}
> +				}
> +
> +				GEM_BUG_ON(ve->siblings[0] != engine);
> +			}
> +
> +			__i915_request_submit(rq);
> +			trace_i915_request_in(rq, port_index(port, execlists));
> +			submit = true;
> +			last = rq;
> +		}
> +
> +		spin_unlock(&ve->base.timeline.lock);
> +		break;
> +	}
> +
>   	while ((rb = rb_first_cached(&execlists->queue))) {
>   		struct i915_priolist *p = to_priolist(rb);
>   		struct i915_request *rq, *rn;
> @@ -2043,6 +2283,26 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   		i915_priolist_free(p);
>   	}
>   
> +	/* Cancel all attached virtual engines */
> +	while ((rb = rb_first_cached(&execlists->virtual))) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +
> +		rb_erase_cached(rb, &execlists->virtual);
> +		RB_CLEAR_NODE(rb);
> +
> +		spin_lock(&ve->base.timeline.lock);
> +		if (ve->request) {
> +			ve->request->engine = engine;
> +			__i915_request_submit(ve->request);
> +			dma_fence_set_error(&ve->request->fence, -EIO);
> +			i915_request_mark_complete(ve->request);
> +			ve->base.execlists.queue_priority_hint = INT_MIN;
> +			ve->request = NULL;
> +		}
> +		spin_unlock(&ve->base.timeline.lock);
> +	}
> +
>   	/* Remaining _unready_ requests will be nop'ed when submitted */
>   
>   	execlists->queue_priority_hint = INT_MIN;
> @@ -2779,6 +3039,316 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
>   	return ret;
>   }
>   
> +static void virtual_context_destroy(struct kref *kref)
> +{
> +	struct virtual_engine *ve =
> +		container_of(kref, typeof(*ve), context.ref);
> +	unsigned int n;
> +
> +	GEM_BUG_ON(ve->request);
> +	GEM_BUG_ON(ve->context.active);
> +
> +	for (n = 0; n < ve->num_siblings; n++) {
> +		struct intel_engine_cs *sibling = ve->siblings[n];
> +		struct rb_node *node = &ve->nodes[sibling->id].rb;
> +
> +		if (RB_EMPTY_NODE(node))
> +			continue;
> +
> +		spin_lock_irq(&sibling->timeline.lock);
> +
> +		if (!RB_EMPTY_NODE(node))
> +			rb_erase_cached(node, &sibling->execlists.virtual);

A consequence of rq duplication across phyisical engines? Leave a comment?

> +
> +		spin_unlock_irq(&sibling->timeline.lock);
> +	}
> +	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
> +
> +	if (ve->context.state)
> +		__execlists_context_fini(&ve->context);
> +
> +	i915_timeline_fini(&ve->base.timeline);
> +	kfree(ve);
> +}
> +
> +static void virtual_engine_initial_hint(struct virtual_engine *ve)
> +{
> +	int swp;
> +
> +	/*
> +	 * Pick a random sibling on starting to help spread the load around.
> +	 *
> +	 * New contexts are typically created with exactly the same order
> +	 * of siblings, and often started in batches. Due to the way we iterate
> +	 * the array of sibling when submitting requests, sibling[0] is
> +	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
> +	 * randomised across the system, we also help spread the load by the
> +	 * first engine we inspect being different each time.
> +	 *
> +	 * NB This does not force us to execute on this engine, it will just
> +	 * typically be the first we inspect for submission.
> +	 */
> +	swp = prandom_u32_max(ve->num_siblings);
> +	if (!swp)
> +		return;
> +
> +	swap(ve->siblings[swp], ve->siblings[0]);
> +	virtual_update_register_offsets(ve->context.lrc_reg_state,
> +					ve->siblings[0]);
> +}
> +
> +static int virtual_context_pin(struct intel_context *ce)
> +{
> +	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> +	int err;
> +
> +	/* Note: we must use a real engine class for setting up reg state */
> +	err = __execlists_context_pin(ce, ve->siblings[0]);
> +	if (err)
> +		return err;
> +
> +	virtual_engine_initial_hint(ve);
> +	return 0;
> +}
> +
> +static void virtual_context_enter(struct intel_context *ce)
> +{
> +	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> +	unsigned int n;
> +
> +	for (n = 0; n < ve->num_siblings; n++)
> +		intel_engine_pm_get(ve->siblings[n]);
> +}
> +
> +static void virtual_context_exit(struct intel_context *ce)
> +{
> +	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> +	unsigned int n;
> +
> +	for (n = 0; n < ve->num_siblings; n++)
> +		intel_engine_pm_put(ve->siblings[n]);
> +}
> +
> +static const struct intel_context_ops virtual_context_ops = {
> +	.pin = virtual_context_pin,
> +	.unpin = execlists_context_unpin,
> +
> +	.enter = virtual_context_enter,
> +	.exit = virtual_context_exit,
> +
> +	.destroy = virtual_context_destroy,
> +};
> +
> +static void virtual_submission_tasklet(unsigned long data)
> +{
> +	struct virtual_engine * const ve = (struct virtual_engine *)data;
> +	const int prio = ve->base.execlists.queue_priority_hint;
> +	unsigned int n;
> +
> +	local_irq_disable();
> +	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
> +		struct intel_engine_cs *sibling = ve->siblings[n];
> +		struct ve_node * const node = &ve->nodes[sibling->id];
> +		struct rb_node **parent, *rb;
> +		bool first;
> +
> +		spin_lock(&sibling->timeline.lock);
> +
> +		if (!RB_EMPTY_NODE(&node->rb)) {
> +			/*
> +			 * Cheat and avoid rebalancing the tree if we can
> +			 * reuse this node in situ.
> +			 */
> +			first = rb_first_cached(&sibling->execlists.virtual) ==
> +				&node->rb;
> +			if (prio == node->prio || (prio > node->prio && first))
> +				goto submit_engine;
> +
> +			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
> +		}
> +
> +		rb = NULL;
> +		first = true;
> +		parent = &sibling->execlists.virtual.rb_root.rb_node;
> +		while (*parent) {
> +			struct ve_node *other;
> +
> +			rb = *parent;
> +			other = rb_entry(rb, typeof(*other), rb);
> +			if (prio > other->prio) {
> +				parent = &rb->rb_left;
> +			} else {
> +				parent = &rb->rb_right;
> +				first = false;
> +			}
> +		}
> +
> +		rb_link_node(&node->rb, rb, parent);
> +		rb_insert_color_cached(&node->rb,
> +				       &sibling->execlists.virtual,
> +				       first);
> +
> +submit_engine:
> +		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
> +		node->prio = prio;
> +		if (first && prio > sibling->execlists.queue_priority_hint) {
> +			sibling->execlists.queue_priority_hint = prio;
> +			tasklet_hi_schedule(&sibling->execlists.tasklet);
> +		}
> +
> +		spin_unlock(&sibling->timeline.lock);
> +	}
> +	local_irq_enable();
> +}
> +
> +static void virtual_submit_request(struct i915_request *rq)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(rq->engine);
> +
> +	GEM_TRACE("%s: rq=%llx:%lld\n",
> +		  ve->base.name,
> +		  rq->fence.context,
> +		  rq->fence.seqno);
> +
> +	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
> +
> +	GEM_BUG_ON(ve->request);
> +	ve->base.execlists.queue_priority_hint = rq_prio(rq);
> +	WRITE_ONCE(ve->request, rq);
> +
> +	tasklet_schedule(&ve->base.execlists.tasklet);
> +}
> +
> +struct intel_context *
> +intel_execlists_create_virtual(struct i915_gem_context *ctx,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int count)
> +{
> +	struct virtual_engine *ve;
> +	unsigned int n;
> +	int err;
> +
> +	if (count == 0)
> +		return ERR_PTR(-EINVAL);
> +
> +	if (count == 1)
> +		return intel_context_create(ctx, siblings[0]);
> +
> +	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
> +	if (!ve)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ve->base.i915 = ctx->i915;
> +	ve->base.id = -1;
> +	ve->base.class = OTHER_CLASS;
> +	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
> +	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
> +	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
> +
> +	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
> +
> +	err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
> +	if (err)
> +		goto err_put;
> +	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
> +
> +	ve->base.cops = &virtual_context_ops;
> +	ve->base.request_alloc = execlists_request_alloc;
> +
> +	ve->base.schedule = i915_schedule;
> +	ve->base.submit_request = virtual_submit_request;
> +
> +	ve->base.execlists.queue_priority_hint = INT_MIN;
> +	tasklet_init(&ve->base.execlists.tasklet,
> +		     virtual_submission_tasklet,
> +		     (unsigned long)ve);
> +
> +	intel_context_init(&ve->context, ctx, &ve->base);
> +
> +	for (n = 0; n < count; n++) {
> +		struct intel_engine_cs *sibling = siblings[n];
> +
> +		GEM_BUG_ON(!is_power_of_2(sibling->mask));
> +		if (sibling->mask & ve->base.mask) {
> +			DRM_DEBUG("duplicate %s entry in load balancer\n",
> +				  sibling->name);
> +			err = -EINVAL;
> +			goto err_put;
> +		}
> +
> +		/*
> +		 * The virtual engine implementation is tightly coupled to
> +		 * the execlists backend -- we push out request directly
> +		 * into a tree inside each physical engine. We could support
> +		 * layering if we handle cloning of the requests and
> +		 * submitting a copy into each backend.
> +		 */
> +		if (sibling->execlists.tasklet.func !=
> +		    execlists_submission_tasklet) {
> +			err = -ENODEV;
> +			goto err_put;
> +		}
> +
> +		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
> +		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
> +
> +		ve->siblings[ve->num_siblings++] = sibling;
> +		ve->base.mask |= sibling->mask;
> +
> +		/*
> +		 * All physical engines must be compatible for their emission
> +		 * functions (as we build the instructions during request
> +		 * construction and do not alter them before submission
> +		 * on the physical engine). We use the engine class as a guide
> +		 * here, although that could be refined.
> +		 */
> +		if (ve->base.class != OTHER_CLASS) {
> +			if (ve->base.class != sibling->class) {
> +				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
> +					  sibling->class, ve->base.class);
> +				err = -EINVAL;
> +				goto err_put;
> +			}
> +			continue;
> +		}
> +
> +		ve->base.class = sibling->class;
> +		ve->base.uabi_class = sibling->uabi_class;
> +		snprintf(ve->base.name, sizeof(ve->base.name),
> +			 "v%dx%d", ve->base.class, count);
> +		ve->base.context_size = sibling->context_size;
> +
> +		ve->base.emit_bb_start = sibling->emit_bb_start;
> +		ve->base.emit_flush = sibling->emit_flush;
> +		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
> +		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
> +		ve->base.emit_fini_breadcrumb_dw =
> +			sibling->emit_fini_breadcrumb_dw;
> +	}
> +
> +	return &ve->context;
> +
> +err_put:
> +	intel_context_put(&ve->context);
> +	return ERR_PTR(err);
> +}
> +
> +struct intel_context *
> +intel_execlists_clone_virtual(struct i915_gem_context *ctx,
> +			      struct intel_engine_cs *src)
> +{
> +	struct virtual_engine *se = to_virtual_engine(src);
> +	struct intel_context *dst;
> +
> +	dst = intel_execlists_create_virtual(ctx,
> +					     se->siblings,
> +					     se->num_siblings);
> +	if (IS_ERR(dst))
> +		return dst;
> +
> +	return dst;
> +}
> +
>   void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   				   struct drm_printer *m,
>   				   void (*show_request)(struct drm_printer *m,
> @@ -2836,6 +3406,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   		show_request(m, last, "\t\tQ ");
>   	}
>   
> +	last = NULL;
> +	count = 0;
> +	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
> +		struct virtual_engine *ve =
> +			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> +		struct i915_request *rq = READ_ONCE(ve->request);
> +
> +		if (rq) {
> +			if (count++ < max - 1)
> +				show_request(m, rq, "\t\tV ");
> +			else
> +				last = rq;
> +		}
> +	}
> +	if (last) {
> +		if (count > max) {
> +			drm_printf(m,
> +				   "\t\t...skipping %d virtual requests...\n",
> +				   count - max);
> +		}
> +		show_request(m, last, "\t\tV ");
> +	}
> +
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index a0dc907a7249..5530606052e5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -114,4 +114,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   							const char *prefix),
>   				   unsigned int max);
>   
> +struct intel_context *
> +intel_execlists_create_virtual(struct i915_gem_context *ctx,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int count);
> +
> +struct intel_context *
> +intel_execlists_clone_virtual(struct i915_gem_context *ctx,
> +			      struct intel_engine_cs *src);
> +
>   #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 84538f69185b..f34aa9e042a3 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1301,6 +1301,185 @@ static int live_preempt_smoke(void *arg)
>   	return err;
>   }
>   
> +static int nop_virtual_engine(struct drm_i915_private *i915,
> +			      struct intel_engine_cs **siblings,
> +			      unsigned int nsibling,
> +			      unsigned int nctx,
> +			      unsigned int flags)
> +#define CHAIN BIT(0)
> +{
> +	IGT_TIMEOUT(end_time);
> +	struct i915_request *request[16];
> +	struct i915_gem_context *ctx[16];
> +	struct intel_context *ve[16];
> +	unsigned long n, prime, nc;
> +	struct igt_live_test t;
> +	ktime_t times[2] = {};
> +	int err;
> +
> +	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
> +
> +	for (n = 0; n < nctx; n++) {
> +		ctx[n] = kernel_context(i915);
> +		if (!ctx[n]) {
> +			err = -ENOMEM;
> +			nctx = n;
> +			goto out;
> +		}
> +
> +		ve[n] = intel_execlists_create_virtual(ctx[n],
> +						       siblings, nsibling);
> +		if (IS_ERR(ve[n])) {
> +			kernel_context_close(ctx[n]);
> +			err = PTR_ERR(ve[n]);
> +			nctx = n;
> +			goto out;
> +		}
> +
> +		err = intel_context_pin(ve[n]);
> +		if (err) {
> +			intel_context_put(ve[n]);
> +			kernel_context_close(ctx[n]);
> +			nctx = n;
> +			goto out;
> +		}
> +	}
> +
> +	err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
> +	if (err)
> +		goto out;
> +
> +	for_each_prime_number_from(prime, 1, 8192) {
> +		times[1] = ktime_get_raw();
> +
> +		if (flags & CHAIN) {
> +			for (nc = 0; nc < nctx; nc++) {
> +				for (n = 0; n < prime; n++) {
> +					request[nc] =
> +						i915_request_create(ve[nc]);
> +					if (IS_ERR(request[nc])) {
> +						err = PTR_ERR(request[nc]);
> +						goto out;
> +					}
> +
> +					i915_request_add(request[nc]);
> +				}
> +			}
> +		} else {
> +			for (n = 0; n < prime; n++) {
> +				for (nc = 0; nc < nctx; nc++) {
> +					request[nc] =
> +						i915_request_create(ve[nc]);
> +					if (IS_ERR(request[nc])) {
> +						err = PTR_ERR(request[nc]);
> +						goto out;
> +					}
> +
> +					i915_request_add(request[nc]);
> +				}
> +			}
> +		}
> +
> +		for (nc = 0; nc < nctx; nc++) {
> +			if (i915_request_wait(request[nc],
> +					      I915_WAIT_LOCKED,
> +					      HZ / 10) < 0) {
> +				pr_err("%s(%s): wait for %llx:%lld timed out\n",
> +				       __func__, ve[0]->engine->name,
> +				       request[nc]->fence.context,
> +				       request[nc]->fence.seqno);
> +
> +				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
> +					  __func__, ve[0]->engine->name,
> +					  request[nc]->fence.context,
> +					  request[nc]->fence.seqno);
> +				GEM_TRACE_DUMP();
> +				i915_gem_set_wedged(i915);
> +				break;
> +			}
> +		}
> +
> +		times[1] = ktime_sub(ktime_get_raw(), times[1]);
> +		if (prime == 1)
> +			times[0] = times[1];
> +
> +		if (__igt_timeout(end_time, NULL))
> +			break;
> +	}
> +
> +	err = igt_live_test_end(&t);
> +	if (err)
> +		goto out;
> +
> +	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
> +		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
> +		prime, div64_u64(ktime_to_ns(times[1]), prime));
> +
> +out:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	for (nc = 0; nc < nctx; nc++) {
> +		intel_context_unpin(ve[nc]);
> +		intel_context_put(ve[nc]);
> +		kernel_context_close(ctx[nc]);
> +	}
> +	return err;
> +}
> +
> +static int live_virtual_engine(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	unsigned int class, inst;
> +	int err = -ENODEV;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for_each_engine(engine, i915, id) {
> +		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
> +		if (err) {
> +			pr_err("Failed to wrap engine %s: err=%d\n",
> +			       engine->name, err);
> +			goto out_unlock;
> +		}
> +	}
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		int nsibling, n;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;
> +
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		for (n = 1; n <= nsibling + 1; n++) {
> +			err = nop_virtual_engine(i915, siblings, nsibling,
> +						 n, 0);
> +			if (err)
> +				goto out_unlock;
> +		}
> +
> +		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
> +		if (err)
> +			goto out_unlock;
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -1313,6 +1492,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_chain_preempt),
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
> +		SUBTEST(live_virtual_engine),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index 67f8a4a807a0..fe82d3571072 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -91,4 +91,9 @@ static inline bool __tasklet_enable(struct tasklet_struct *t)
>   	return atomic_dec_and_test(&t->count);
>   }
>   
> +static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
> +{
> +	return test_bit(TASKLET_STATE_SCHED, &t->state);
> +}
> +
>   #endif /* __I915_GEM_H__ */
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index ba7582d955d1..57b09f624bb4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -86,6 +86,7 @@
>    */
>   
>   #include <linux/log2.h>
> +#include <linux/nospec.h>
>   
>   #include <drm/i915_drm.h>
>   
> @@ -1209,7 +1210,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce,
>   	int ret;
>   
>   	GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
> -	GEM_BUG_ON(ce->engine->id != RCS0);
>   
>   	ret = intel_context_lock_pinned(ce);
>   	if (ret)
> @@ -1397,7 +1397,102 @@ struct set_engines {
>   	struct i915_gem_engines *engines;
>   };
>   
> +static int
> +set_engines__load_balance(struct i915_user_extension __user *base, void *data)
> +{
> +	struct i915_context_engines_load_balance __user *ext =
> +		container_of_user(base, typeof(*ext), base);
> +	const struct set_engines *set = data;
> +	struct intel_engine_cs *stack[16];
> +	struct intel_engine_cs **siblings;
> +	struct intel_context *ce;
> +	u16 num_siblings, idx;
> +	unsigned int n;
> +	int err;
> +
> +	if (!HAS_EXECLISTS(set->ctx->i915))
> +		return -ENODEV;
> +
> +	if (USES_GUC_SUBMISSION(set->ctx->i915))
> +		return -ENODEV; /* not implement yet */
> +
> +	if (get_user(idx, &ext->engine_index))
> +		return -EFAULT;
> +
> +	if (idx >= set->engines->num_engines) {
> +		DRM_DEBUG("Invalid placement value, %d >= %d\n",
> +			  idx, set->engines->num_engines);
> +		return -EINVAL;
> +	}
> +
> +	idx = array_index_nospec(idx, set->engines->num_engines);
> +	if (set->engines->engines[idx]) {
> +		DRM_DEBUG("Invalid placement[%d], already occupied\n", idx);
> +		return -EEXIST;
> +	}
> +
> +	if (get_user(num_siblings, &ext->num_siblings))
> +		return -EFAULT;
> +
> +	err = check_user_mbz(&ext->flags);
> +	if (err)
> +		return err;
> +
> +	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> +		err = check_user_mbz(&ext->mbz64[n]);
> +		if (err)
> +			return err;
> +	}
> +
> +	siblings = stack;
> +	if (num_siblings > ARRAY_SIZE(stack)) {
> +		siblings = kmalloc_array(num_siblings,
> +					 sizeof(*siblings),
> +					 GFP_KERNEL);
> +		if (!siblings)
> +			return -ENOMEM;
> +	}
> +
> +	for (n = 0; n < num_siblings; n++) {
> +		struct i915_engine_class_instance ci;
> +
> +		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
> +			err = -EFAULT;
> +			goto out_siblings;
> +		}
> +
> +		siblings[n] = intel_engine_lookup_user(set->ctx->i915,
> +						       ci.engine_class,
> +						       ci.engine_instance);
> +		if (!siblings[n]) {
> +			DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n",
> +				  n, ci.engine_class, ci.engine_instance);
> +			err = -EINVAL;
> +			goto out_siblings;
> +		}
> +	}
> +
> +	ce = intel_execlists_create_virtual(set->ctx, siblings, n);
> +	if (IS_ERR(ce)) {
> +		err = PTR_ERR(ce);
> +		goto out_siblings;
> +	}
> +
> +	if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
> +		intel_context_put(ce);
> +		err = -EEXIST;
> +		goto out_siblings;
> +	}
> +
> +out_siblings:
> +	if (siblings != stack)
> +		kfree(siblings);
> +
> +	return err;
> +}
> +
>   static const i915_user_extension_fn set_engines__extensions[] = {
> +	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
>   };
>   
>   static int
> @@ -1696,14 +1791,29 @@ static int clone_engines(struct i915_gem_context *dst,
>   
>   	clone->i915 = dst->i915;
>   	for (n = 0; n < e->num_engines; n++) {
> +		struct intel_engine_cs *engine;
> +
>   		if (!e->engines[n]) {
>   			clone->engines[n] = NULL;
>   			continue;
>   		}
> +		engine = e->engines[n]->engine;
>   
> -		clone->engines[n] =
> -			intel_context_create(dst, e->engines[n]->engine);
> -		if (!clone->engines[n]) {
> +		/*
> +		 * Virtual engines are singletons; they can only exist
> +		 * inside a single context, because they embed their
> +		 * HW context... As each virtual context implies a single
> +		 * timeline (each engine can only dequeue a single request
> +		 * at any time), it would be surprising for two contexts
> +		 * to use the same engine. So let's create a copy of
> +		 * the virtual engine instead.
> +		 */
> +		if (intel_engine_is_virtual(engine))
> +			clone->engines[n] =
> +				intel_execlists_clone_virtual(dst, engine);
> +		else
> +			clone->engines[n] = intel_context_create(dst, engine);
> +		if (IS_ERR_OR_NULL(clone->engines[n])) {
>   			__free_engines(clone, n);
>   			goto err_unlock;
>   		}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 39bc4f54e272..b58d9c23a876 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -248,17 +248,26 @@ sched_lock_engine(const struct i915_sched_node *node,
>   		  struct intel_engine_cs *locked,
>   		  struct sched_cache *cache)
>   {
> -	struct intel_engine_cs *engine = node_to_request(node)->engine;
> +	const struct i915_request *rq = node_to_request(node);
> +	struct intel_engine_cs *engine;
>   
>   	GEM_BUG_ON(!locked);
>   
> -	if (engine != locked) {
> +	/*
> +	 * Virtual engines complicate acquiring the engine timeline lock,
> +	 * as their rq->engine pointer is not stable until under that
> +	 * engine lock. The simple ploy we use is to take the lock then
> +	 * check that the rq still belongs to the newly locked engine.
> +	 */
> +	while (locked != (engine = READ_ONCE(rq->engine))) {
>   		spin_unlock(&locked->timeline.lock);
>   		memset(cache, 0, sizeof(*cache));
>   		spin_lock(&engine->timeline.lock);
> +		locked = engine;
>   	}
>   
> -	return engine;
> +	GEM_BUG_ON(locked != engine);
> +	return locked;
>   }
>   
>   static bool inflight(const struct i915_request *rq,
> @@ -371,8 +380,11 @@ static void __i915_schedule(struct i915_request *rq,
>   		if (prio <= node->attr.priority || node_signaled(node))
>   			continue;
>   
> +		GEM_BUG_ON(node_to_request(node)->engine != engine);
> +
>   		node->attr.priority = prio;
>   		if (!list_empty(&node->link)) {
> +			GEM_BUG_ON(intel_engine_is_virtual(engine));
>   			if (!cache.priolist)
>   				cache.priolist =
>   					i915_sched_lookup_priolist(engine,
> diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
> index 5256a0b5c5f7..1688705f4a2b 100644
> --- a/drivers/gpu/drm/i915/i915_timeline_types.h
> +++ b/drivers/gpu/drm/i915/i915_timeline_types.h
> @@ -26,6 +26,7 @@ struct i915_timeline {
>   	spinlock_t lock;
>   #define TIMELINE_CLIENT 0 /* default subclass */
>   #define TIMELINE_ENGINE 1
> +#define TIMELINE_VIRTUAL 2
>   	struct mutex mutex; /* protects the flow of requests */
>   
>   	unsigned int pin_count;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7694113362d4..ff2ababc0984 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -137,6 +137,7 @@ struct i915_engine_class_instance {
>   	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
>   	__u16 engine_instance;
>   #define I915_ENGINE_CLASS_INVALID_NONE -1
> +#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
>   };
>   
>   /**
> @@ -1607,8 +1608,46 @@ struct drm_i915_gem_context_param_sseu {
>   	__u32 rsvd;
>   };
>   
> +/*
> + * i915_context_engines_load_balance:
> + *
> + * Enable load balancing across this set of engines.
> + *
> + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
> + * used will proxy the execbuffer request onto one of the set of engines
> + * in such a way as to distribute the load evenly across the set.
> + *
> + * The set of engines must be compatible (e.g. the same HW class) as they
> + * will share the same logical GPU context and ring.
> + *
> + * To intermix rendering with the virtual engine and direct rendering onto
> + * the backing engines (bypassing the load balancing proxy), the context must
> + * be defined to use a single timeline for all engines.
> + */
> +struct i915_context_engines_load_balance {
> +	struct i915_user_extension base;
> +
> +	__u16 engine_index;
> +	__u16 num_siblings;
> +	__u32 flags; /* all undefined flags must be zero */
> +
> +	__u64 mbz64[1]; /* reserved for future use; must be zero */

Why an array if only one? Should we add a few more just in case?

> +
> +	struct i915_engine_class_instance engines[0];
> +} __attribute__((packed));
> +
> +#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
> +	struct i915_user_extension base; \
> +	__u16 engine_index; \
> +	__u16 num_siblings; \
> +	__u32 flags; \
> +	__u64 mbz64[1]; \
> +	struct i915_engine_class_instance engines[N__]; \
> +} __attribute__((packed)) name__
> +
>   struct i915_context_param_engines {
>   	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
> +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
>   	struct i915_engine_class_instance engines[0];
>   } __attribute__((packed));
>   
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (31 preceding siblings ...)
  2019-04-17  8:46 ` [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
@ 2019-04-17 11:33 ` Patchwork
  2019-04-18 10:32 ` [PATCH 01/32] " Tvrtko Ursulin
  2019-04-23 12:59 ` Tvrtko Ursulin
  34 siblings, 0 replies; 68+ messages in thread
From: Patchwork @ 2019-04-17 11:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling
URL   : https://patchwork.freedesktop.org/series/59636/
State : failure

== Summary ==

Applying: drm/i915: Seal races between async GPU cancellation, retirement and signaling
Applying: drm/i915: Verify workarounds immediately after application
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/intel_workarounds.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/intel_workarounds.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/intel_workarounds.c
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch' to see the failed patch
Patch failed at 0002 drm/i915: Verify workarounds immediately after application
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine
  2019-04-17  7:56 ` [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine Chris Wilson
@ 2019-04-17 11:43   ` Tvrtko Ursulin
  2019-04-17 11:57     ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17 11:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> Allow the user to direct which physical engines of the virtual engine
> they wish to execute one, as sometimes it is necessary to override the
> load balancing algorithm.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c    |  58 +++++++++++
>   drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_request.c    |   1 +
>   drivers/gpu/drm/i915/i915_request.h    |   3 +
>   4 files changed, 193 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index d6efd6aa67cb..560a18bb4cbb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -552,6 +552,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
>   	intel_engine_context_out(rq->engine);
>   	execlists_context_status_change(rq, status);
>   	trace_i915_request_out(rq);
> +
> +	/*
> +	 * If this is part of a virtual engine, its next request may have
> +	 * been blocked waiting for access to the active context. We have
> +	 * to kick all the siblings again in case we need to switch (e.g.
> +	 * the next request is not runnable on this engine). Hopefully,
> +	 * we will already have submitted the next request before the
> +	 * tasklet runs and do not need to rebuild each virtual tree
> +	 * and kick everyone again.
> +	 */
> +	if (rq->engine != rq->hw_context->engine)
> +		tasklet_schedule(&rq->hw_context->engine->execlists.tasklet);

Is this needed only for non-default execution_mask? If so it would be 
good to limit it to avoid tasklet storm with plain veng.

>   }
>   
>   static u64 execlists_update_context(struct i915_request *rq)
> @@ -779,6 +791,9 @@ static bool virtual_matches(const struct virtual_engine *ve,
>   {
>   	const struct intel_engine_cs *active;
>   
> +	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
> +		return false;
> +
>   	/*
>   	 * We track when the HW has completed saving the context image
>   	 * (i.e. when we have seen the final CS event switching out of
> @@ -3139,12 +3154,44 @@ static const struct intel_context_ops virtual_context_ops = {
>   	.destroy = virtual_context_destroy,
>   };
>   
> +static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
> +{
> +	struct i915_request *rq;
> +	intel_engine_mask_t mask;

intel_engine_mask_t throughout is the wrong type for this, even if we 
disallowed duplicate siblings, and even more so if we don't.

Either way it seems like the 64 sibling limit needs to be back. Or maybe 
only in the bonding case?

> +
> +	rq = READ_ONCE(ve->request);
> +	if (!rq)
> +		return 0;
> +
> +	/* The rq is ready for submission; rq->execution_mask is now stable. */
> +	mask = rq->execution_mask;
> +	if (unlikely(!mask)) {
> +		/* Invalid selection, submit to a random engine in error */
> +		i915_request_skip(rq, -ENODEV);

When can this happen? It looks like if it can happen we should reject it 
earlier. Or if it can't then just assert.

> +		mask = ve->siblings[0]->mask;
> +	}
> +
> +	GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
> +		  ve->base.name,
> +		  rq->fence.context, rq->fence.seqno,
> +		  mask, ve->base.execlists.queue_priority_hint);
> +
> +	return mask;
> +}
> +
>   static void virtual_submission_tasklet(unsigned long data)
>   {
>   	struct virtual_engine * const ve = (struct virtual_engine *)data;
>   	const int prio = ve->base.execlists.queue_priority_hint;
> +	intel_engine_mask_t mask;
>   	unsigned int n;
>   
> +	rcu_read_lock();
> +	mask = virtual_submission_mask(ve);
> +	rcu_read_unlock();

What is the RCU for?

> +	if (unlikely(!mask))
> +		return;
> +
>   	local_irq_disable();
>   	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
>   		struct intel_engine_cs *sibling = ve->siblings[n];
> @@ -3152,6 +3199,17 @@ static void virtual_submission_tasklet(unsigned long data)
>   		struct rb_node **parent, *rb;
>   		bool first;
>   
> +		if (unlikely(!(mask & sibling->mask))) {
> +			if (!RB_EMPTY_NODE(&node->rb)) {
> +				spin_lock(&sibling->timeline.lock);
> +				rb_erase_cached(&node->rb,
> +						&sibling->execlists.virtual);
> +				RB_CLEAR_NODE(&node->rb);
> +				spin_unlock(&sibling->timeline.lock);
> +			}
> +			continue;
> +		}
> +
>   		spin_lock(&sibling->timeline.lock);
>   
>   		if (!RB_EMPTY_NODE(&node->rb)) {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index f34aa9e042a3..209e51ef13e6 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1480,6 +1480,136 @@ static int live_virtual_engine(void *arg)
>   	return err;
>   }
>   
> +static int mask_virtual_engine(struct drm_i915_private *i915,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int nsibling)
> +{
> +	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
> +	struct i915_gem_context *ctx;
> +	struct intel_context *ve;
> +	struct igt_live_test t;
> +	unsigned int n;
> +	int err;
> +
> +	/*
> +	 * Check that by setting the execution mask on a request, we can
> +	 * restrict it to our desired engine within the virtual engine.
> +	 */
> +
> +	ctx = kernel_context(i915);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
> +	if (IS_ERR(ve)) {
> +		err = PTR_ERR(ve);
> +		goto out_close;
> +	}
> +
> +	err = intel_context_pin(ve);
> +	if (err)
> +		goto out_put;
> +
> +	err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
> +	if (err)
> +		goto out_unpin;
> +
> +	for (n = 0; n < nsibling; n++) {
> +		request[n] = i915_request_create(ve);
> +		if (IS_ERR(request)) {
> +			err = PTR_ERR(request);
> +			nsibling = n;
> +			goto out;
> +		}
> +
> +		/* Reverse order as it's more likely to be unnatural */
> +		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
> +
> +		i915_request_get(request[n]);
> +		i915_request_add(request[n]);
> +	}
> +
> +	for (n = 0; n < nsibling; n++) {
> +		if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) {
> +			pr_err("%s(%s): wait for %llx:%lld timed out\n",
> +			       __func__, ve->engine->name,
> +			       request[n]->fence.context,
> +			       request[n]->fence.seqno);
> +
> +			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
> +				  __func__, ve->engine->name,
> +				  request[n]->fence.context,
> +				  request[n]->fence.seqno);
> +			GEM_TRACE_DUMP();
> +			i915_gem_set_wedged(i915);
> +			err = -EIO;
> +			goto out;
> +		}
> +
> +		if (request[n]->engine != siblings[nsibling - n - 1]) {
> +			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
> +			       request[n]->engine->name,
> +			       siblings[nsibling - n - 1]->name);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +	}
> +
> +	err = igt_live_test_end(&t);
> +	if (err)
> +		goto out;
> +
> +out:
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	for (n = 0; n < nsibling; n++)
> +		i915_request_put(request[n]);
> +
> +out_unpin:
> +	intel_context_unpin(ve);
> +out_put:
> +	intel_context_put(ve);
> +out_close:
> +	kernel_context_close(ctx);
> +	return err;
> +}
> +
> +static int live_virtual_mask(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	unsigned int class, inst;
> +	int err = 0;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		unsigned int nsibling;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;
> +
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		err = mask_virtual_engine(i915, siblings, nsibling);
> +		if (err)
> +			goto out_unlock;
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -1493,6 +1623,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
>   		SUBTEST(live_virtual_engine),
> +		SUBTEST(live_virtual_mask),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 46f4fc2a8840..78c07e131521 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -688,6 +688,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>   	rq->batch = NULL;
>   	rq->capture_list = NULL;
>   	rq->waitboost = false;
> +	rq->execution_mask = ALL_ENGINES;
>   
>   	INIT_LIST_HEAD(&rq->active_list);
>   	INIT_LIST_HEAD(&rq->execute_cb);
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 8025a89b5999..d7f9b2194568 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -28,6 +28,8 @@
>   #include <linux/dma-fence.h>
>   #include <linux/lockdep.h>
>   
> +#include "gt/intel_engine_types.h"
> +
>   #include "i915_gem.h"
>   #include "i915_scheduler.h"
>   #include "i915_selftest.h"
> @@ -156,6 +158,7 @@ struct i915_request {
>   	 */
>   	struct i915_sched_node sched;
>   	struct i915_dependency dep;
> +	intel_engine_mask_t execution_mask;
>   
>   	/*
>   	 * A convenience pointer to the current breadcrumb value stored in
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine
  2019-04-17 11:43   ` Tvrtko Ursulin
@ 2019-04-17 11:57     ` Chris Wilson
  2019-04-17 12:35       ` Tvrtko Ursulin
  0 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17 11:57 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-17 12:43:49)
> 
> On 17/04/2019 08:56, Chris Wilson wrote:
> > Allow the user to direct which physical engines of the virtual engine
> > they wish to execute one, as sometimes it is necessary to override the
> > load balancing algorithm.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_lrc.c    |  58 +++++++++++
> >   drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
> >   drivers/gpu/drm/i915/i915_request.c    |   1 +
> >   drivers/gpu/drm/i915/i915_request.h    |   3 +
> >   4 files changed, 193 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index d6efd6aa67cb..560a18bb4cbb 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -552,6 +552,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
> >       intel_engine_context_out(rq->engine);
> >       execlists_context_status_change(rq, status);
> >       trace_i915_request_out(rq);
> > +
> > +     /*
> > +      * If this is part of a virtual engine, its next request may have
> > +      * been blocked waiting for access to the active context. We have
> > +      * to kick all the siblings again in case we need to switch (e.g.
> > +      * the next request is not runnable on this engine). Hopefully,
> > +      * we will already have submitted the next request before the
> > +      * tasklet runs and do not need to rebuild each virtual tree
> > +      * and kick everyone again.
> > +      */
> > +     if (rq->engine != rq->hw_context->engine)
> > +             tasklet_schedule(&rq->hw_context->engine->execlists.tasklet);
> 
> Is this needed only for non-default execution_mask? If so it would be 
> good to limit it to avoid tasklet storm with plain veng.

The issue is not just with this rq but the next one. If that has a
restricted mask that prevents it running on this engine, we may have
missed the opportunity to queue it (and so never run it under just the
right circumstances).

Something like
	to_virtual_engine(rq->hw_context->engine)->request->execution_mask & ~rq->execution_mask

The storm isn't quite so bad, it's only on context-out, and we often do
succeed in keeping it busy. I was just trying to avoid pulling in ve here.

> >   static u64 execlists_update_context(struct i915_request *rq)
> > @@ -779,6 +791,9 @@ static bool virtual_matches(const struct virtual_engine *ve,
> >   {
> >       const struct intel_engine_cs *active;
> >   
> > +     if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
> > +             return false;
> > +
> >       /*
> >        * We track when the HW has completed saving the context image
> >        * (i.e. when we have seen the final CS event switching out of
> > @@ -3139,12 +3154,44 @@ static const struct intel_context_ops virtual_context_ops = {
> >       .destroy = virtual_context_destroy,
> >   };
> >   
> > +static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
> > +{
> > +     struct i915_request *rq;
> > +     intel_engine_mask_t mask;
> 
> intel_engine_mask_t throughout is the wrong type for this, even if we 
> disallowed duplicate siblings, and even more so if we don't.

Why? It's a mask of engine->mask (and has to be of the physical id to
allow it to be position invariant in siblings[]).
 
> Either way it seems like the 64 sibling limit needs to be back. Or maybe 
> only in the bonding case?

?

> 
> > +
> > +     rq = READ_ONCE(ve->request);
> > +     if (!rq)
> > +             return 0;
> > +
> > +     /* The rq is ready for submission; rq->execution_mask is now stable. */
> > +     mask = rq->execution_mask;
> > +     if (unlikely(!mask)) {
> > +             /* Invalid selection, submit to a random engine in error */
> > +             i915_request_skip(rq, -ENODEV);
> 
> When can this happen? It looks like if it can happen we should reject it 
> earlier. Or if it can't then just assert.

Many submit fences can end up with an interesection of 0. This is the
convenient point to do the rejection, as with any other asynchronous
error.

> > +             mask = ve->siblings[0]->mask;
> > +     }
> > +
> > +     GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
> > +               ve->base.name,
> > +               rq->fence.context, rq->fence.seqno,
> > +               mask, ve->base.execlists.queue_priority_hint);
> > +
> > +     return mask;
> > +}
> > +
> >   static void virtual_submission_tasklet(unsigned long data)
> >   {
> >       struct virtual_engine * const ve = (struct virtual_engine *)data;
> >       const int prio = ve->base.execlists.queue_priority_hint;
> > +     intel_engine_mask_t mask;
> >       unsigned int n;
> >   
> > +     rcu_read_lock();
> > +     mask = virtual_submission_mask(ve);
> > +     rcu_read_unlock();
> 
> What is the RCU for?

Accessing ve->request. There's nothing stopping another engine from
spotting the ve->request still in its tree, submitting it and it being
retired all during the read here.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine
  2019-04-17 11:57     ` Chris Wilson
@ 2019-04-17 12:35       ` Tvrtko Ursulin
  2019-04-17 12:46         ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17 12:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 12:57, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-04-17 12:43:49)
>>
>> On 17/04/2019 08:56, Chris Wilson wrote:
>>> Allow the user to direct which physical engines of the virtual engine
>>> they wish to execute one, as sometimes it is necessary to override the
>>> load balancing algorithm.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/gpu/drm/i915/gt/intel_lrc.c    |  58 +++++++++++
>>>    drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
>>>    drivers/gpu/drm/i915/i915_request.c    |   1 +
>>>    drivers/gpu/drm/i915/i915_request.h    |   3 +
>>>    4 files changed, 193 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> index d6efd6aa67cb..560a18bb4cbb 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>> @@ -552,6 +552,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
>>>        intel_engine_context_out(rq->engine);
>>>        execlists_context_status_change(rq, status);
>>>        trace_i915_request_out(rq);
>>> +
>>> +     /*
>>> +      * If this is part of a virtual engine, its next request may have
>>> +      * been blocked waiting for access to the active context. We have
>>> +      * to kick all the siblings again in case we need to switch (e.g.
>>> +      * the next request is not runnable on this engine). Hopefully,
>>> +      * we will already have submitted the next request before the
>>> +      * tasklet runs and do not need to rebuild each virtual tree
>>> +      * and kick everyone again.
>>> +      */
>>> +     if (rq->engine != rq->hw_context->engine)
>>> +             tasklet_schedule(&rq->hw_context->engine->execlists.tasklet);
>>
>> Is this needed only for non-default execution_mask? If so it would be
>> good to limit it to avoid tasklet storm with plain veng.
> 
> The issue is not just with this rq but the next one. If that has a
> restricted mask that prevents it running on this engine, we may have
> missed the opportunity to queue it (and so never run it under just the
> right circumstances).
> 
> Something like
> 	to_virtual_engine(rq->hw_context->engine)->request->execution_mask & ~rq->execution_mask
> 
> The storm isn't quite so bad, it's only on context-out, and we often do
> succeed in keeping it busy. I was just trying to avoid pulling in ve here.

What do you mean by the "pulling in ve" bit? Avoiding using 
to_virtual_engine like in the line you wrote above?

> 
>>>    static u64 execlists_update_context(struct i915_request *rq)
>>> @@ -779,6 +791,9 @@ static bool virtual_matches(const struct virtual_engine *ve,
>>>    {
>>>        const struct intel_engine_cs *active;
>>>    
>>> +     if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
>>> +             return false;
>>> +
>>>        /*
>>>         * We track when the HW has completed saving the context image
>>>         * (i.e. when we have seen the final CS event switching out of
>>> @@ -3139,12 +3154,44 @@ static const struct intel_context_ops virtual_context_ops = {
>>>        .destroy = virtual_context_destroy,
>>>    };
>>>    
>>> +static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
>>> +{
>>> +     struct i915_request *rq;
>>> +     intel_engine_mask_t mask;
>>
>> intel_engine_mask_t throughout is the wrong type for this, even if we
>> disallowed duplicate siblings, and even more so if we don't.
> 
> Why? It's a mask of engine->mask (and has to be of the physical id to
> allow it to be position invariant in siblings[]).
>   
>> Either way it seems like the 64 sibling limit needs to be back. Or maybe
>> only in the bonding case?
> 
> ?

My bad. I mistakenly thought execution_mask relates to position of 
engines in the siblings array.

>>
>>> +
>>> +     rq = READ_ONCE(ve->request);
>>> +     if (!rq)
>>> +             return 0;
>>> +
>>> +     /* The rq is ready for submission; rq->execution_mask is now stable. */
>>> +     mask = rq->execution_mask;
>>> +     if (unlikely(!mask)) {
>>> +             /* Invalid selection, submit to a random engine in error */
>>> +             i915_request_skip(rq, -ENODEV);
>>
>> When can this happen? It looks like if it can happen we should reject it
>> earlier. Or if it can't then just assert.
> 
> Many submit fences can end up with an interesection of 0. This is the
> convenient point to do the rejection, as with any other asynchronous
> error.

Which ones are many? Why would we have uAPI which allows setting 
impossible things where all requests will fail with -ENODEV?

> 
>>> +             mask = ve->siblings[0]->mask;
>>> +     }
>>> +
>>> +     GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
>>> +               ve->base.name,
>>> +               rq->fence.context, rq->fence.seqno,
>>> +               mask, ve->base.execlists.queue_priority_hint);
>>> +
>>> +     return mask;
>>> +}
>>> +
>>>    static void virtual_submission_tasklet(unsigned long data)
>>>    {
>>>        struct virtual_engine * const ve = (struct virtual_engine *)data;
>>>        const int prio = ve->base.execlists.queue_priority_hint;
>>> +     intel_engine_mask_t mask;
>>>        unsigned int n;
>>>    
>>> +     rcu_read_lock();
>>> +     mask = virtual_submission_mask(ve);
>>> +     rcu_read_unlock();
>>
>> What is the RCU for?
> 
> Accessing ve->request. There's nothing stopping another engine from
> spotting the ve->request still in its tree, submitting it and it being
> retired all during the read here.

AFAIU there can only be one instance of virtual_submission_tasklet per 
VE at a time and the code above is before the request is inserted into 
physical engine trees. So I don't get it.

Hm.. but going back to the veng patch, there is a 
GEM_BUG_ON(ve->request) in virtual_submit_request. Why couldn't this be 
called multiple times in parallel for different requests?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine
  2019-04-17 12:35       ` Tvrtko Ursulin
@ 2019-04-17 12:46         ` Chris Wilson
  2019-04-17 13:32           ` Tvrtko Ursulin
  0 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-17 12:46 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-17 13:35:29)
> 
> On 17/04/2019 12:57, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-04-17 12:43:49)
> >>
> >> On 17/04/2019 08:56, Chris Wilson wrote:
> >>> Allow the user to direct which physical engines of the virtual engine
> >>> they wish to execute one, as sometimes it is necessary to override the
> >>> load balancing algorithm.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>>    drivers/gpu/drm/i915/gt/intel_lrc.c    |  58 +++++++++++
> >>>    drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
> >>>    drivers/gpu/drm/i915/i915_request.c    |   1 +
> >>>    drivers/gpu/drm/i915/i915_request.h    |   3 +
> >>>    4 files changed, 193 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>> index d6efd6aa67cb..560a18bb4cbb 100644
> >>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>> @@ -552,6 +552,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
> >>>        intel_engine_context_out(rq->engine);
> >>>        execlists_context_status_change(rq, status);
> >>>        trace_i915_request_out(rq);
> >>> +
> >>> +     /*
> >>> +      * If this is part of a virtual engine, its next request may have
> >>> +      * been blocked waiting for access to the active context. We have
> >>> +      * to kick all the siblings again in case we need to switch (e.g.
> >>> +      * the next request is not runnable on this engine). Hopefully,
> >>> +      * we will already have submitted the next request before the
> >>> +      * tasklet runs and do not need to rebuild each virtual tree
> >>> +      * and kick everyone again.
> >>> +      */
> >>> +     if (rq->engine != rq->hw_context->engine)
> >>> +             tasklet_schedule(&rq->hw_context->engine->execlists.tasklet);
> >>
> >> Is this needed only for non-default execution_mask? If so it would be
> >> good to limit it to avoid tasklet storm with plain veng.
> > 
> > The issue is not just with this rq but the next one. If that has a
> > restricted mask that prevents it running on this engine, we may have
> > missed the opportunity to queue it (and so never run it under just the
> > right circumstances).
> > 
> > Something like
> >       to_virtual_engine(rq->hw_context->engine)->request->execution_mask & ~rq->execution_mask
> > 
> > The storm isn't quite so bad, it's only on context-out, and we often do
> > succeed in keeping it busy. I was just trying to avoid pulling in ve here.
> 
> What do you mean by the "pulling in ve" bit? Avoiding using 
> to_virtual_engine like in the line you wrote above?

Just laziness hiding behind an excuse of trying to not to smear veng too
widely.

> >>> +
> >>> +     rq = READ_ONCE(ve->request);
> >>> +     if (!rq)
> >>> +             return 0;
> >>> +
> >>> +     /* The rq is ready for submission; rq->execution_mask is now stable. */
> >>> +     mask = rq->execution_mask;
> >>> +     if (unlikely(!mask)) {
> >>> +             /* Invalid selection, submit to a random engine in error */
> >>> +             i915_request_skip(rq, -ENODEV);
> >>
> >> When can this happen? It looks like if it can happen we should reject it
> >> earlier. Or if it can't then just assert.
> > 
> > Many submit fences can end up with an interesection of 0. This is the
> > convenient point to do the rejection, as with any other asynchronous
> > error.
> 
> Which ones are many? Why would we have uAPI which allows setting 
> impossible things where all requests will fail with -ENODEV?

But we are rejecting them in the uAPI, right here. This is the earliest
point where all the information for a particular execbuf is available
and we have the means of reporting that back.

> >>> +             mask = ve->siblings[0]->mask;
> >>> +     }
> >>> +
> >>> +     GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
> >>> +               ve->base.name,
> >>> +               rq->fence.context, rq->fence.seqno,
> >>> +               mask, ve->base.execlists.queue_priority_hint);
> >>> +
> >>> +     return mask;
> >>> +}
> >>> +
> >>>    static void virtual_submission_tasklet(unsigned long data)
> >>>    {
> >>>        struct virtual_engine * const ve = (struct virtual_engine *)data;
> >>>        const int prio = ve->base.execlists.queue_priority_hint;
> >>> +     intel_engine_mask_t mask;
> >>>        unsigned int n;
> >>>    
> >>> +     rcu_read_lock();
> >>> +     mask = virtual_submission_mask(ve);
> >>> +     rcu_read_unlock();
> >>
> >> What is the RCU for?
> > 
> > Accessing ve->request. There's nothing stopping another engine from
> > spotting the ve->request still in its tree, submitting it and it being
> > retired all during the read here.
> 
> AFAIU there can only be one instance of virtual_submission_tasklet per 
> VE at a time and the code above is before the request is inserted into 
> physical engine trees. So I don't get it.

But the veng is being utilized by real engines concurrently, they are
who take the ve->request and execute it and so may free the ve->request
behind the submission tasklet's back. Later on the spinlock comes into
play after we have decided there's a request ready.

> Hm.. but going back to the veng patch, there is a 
> GEM_BUG_ON(ve->request) in virtual_submit_request. Why couldn't this be 
> called multiple times in parallel for different requests?

Because we strictly ordered submission into the veng so that it only
considers one ready request at a time. Processing more requests
decreased global throughput as load-balancing is no longer "late" (the
physical engines then amalgamate all the ve requests into one submit).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine
  2019-04-17 12:46         ` Chris Wilson
@ 2019-04-17 13:32           ` Tvrtko Ursulin
  2019-04-18  7:24             ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-17 13:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 13:46, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-04-17 13:35:29)
>>
>> On 17/04/2019 12:57, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-04-17 12:43:49)
>>>>
>>>> On 17/04/2019 08:56, Chris Wilson wrote:
>>>>> Allow the user to direct which physical engines of the virtual engine
>>>>> they wish to execute one, as sometimes it is necessary to override the
>>>>> load balancing algorithm.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/gt/intel_lrc.c    |  58 +++++++++++
>>>>>     drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
>>>>>     drivers/gpu/drm/i915/i915_request.c    |   1 +
>>>>>     drivers/gpu/drm/i915/i915_request.h    |   3 +
>>>>>     4 files changed, 193 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>>>> index d6efd6aa67cb..560a18bb4cbb 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>>>>> @@ -552,6 +552,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
>>>>>         intel_engine_context_out(rq->engine);
>>>>>         execlists_context_status_change(rq, status);
>>>>>         trace_i915_request_out(rq);
>>>>> +
>>>>> +     /*
>>>>> +      * If this is part of a virtual engine, its next request may have
>>>>> +      * been blocked waiting for access to the active context. We have
>>>>> +      * to kick all the siblings again in case we need to switch (e.g.
>>>>> +      * the next request is not runnable on this engine). Hopefully,
>>>>> +      * we will already have submitted the next request before the
>>>>> +      * tasklet runs and do not need to rebuild each virtual tree
>>>>> +      * and kick everyone again.
>>>>> +      */
>>>>> +     if (rq->engine != rq->hw_context->engine)
>>>>> +             tasklet_schedule(&rq->hw_context->engine->execlists.tasklet);
>>>>
>>>> Is this needed only for non-default execution_mask? If so it would be
>>>> good to limit it to avoid tasklet storm with plain veng.
>>>
>>> The issue is not just with this rq but the next one. If that has a
>>> restricted mask that prevents it running on this engine, we may have
>>> missed the opportunity to queue it (and so never run it under just the
>>> right circumstances).
>>>
>>> Something like
>>>        to_virtual_engine(rq->hw_context->engine)->request->execution_mask & ~rq->execution_mask
>>>
>>> The storm isn't quite so bad, it's only on context-out, and we often do
>>> succeed in keeping it busy. I was just trying to avoid pulling in ve here.
>>
>> What do you mean by the "pulling in ve" bit? Avoiding using
>> to_virtual_engine like in the line you wrote above?
> 
> Just laziness hiding behind an excuse of trying to not to smear veng too
> widely.
> 
>>>>> +
>>>>> +     rq = READ_ONCE(ve->request);
>>>>> +     if (!rq)
>>>>> +             return 0;
>>>>> +
>>>>> +     /* The rq is ready for submission; rq->execution_mask is now stable. */
>>>>> +     mask = rq->execution_mask;
>>>>> +     if (unlikely(!mask)) {
>>>>> +             /* Invalid selection, submit to a random engine in error */
>>>>> +             i915_request_skip(rq, -ENODEV);
>>>>
>>>> When can this happen? It looks like if it can happen we should reject it
>>>> earlier. Or if it can't then just assert.
>>>
>>> Many submit fences can end up with an interesection of 0. This is the
>>> convenient point to do the rejection, as with any other asynchronous
>>> error.
>>
>> Which ones are many? Why would we have uAPI which allows setting
>> impossible things where all requests will fail with -ENODEV?
> 
> But we are rejecting them in the uAPI, right here. This is the earliest
> point where all the information for a particular execbuf is available
> and we have the means of reporting that back.

In the tasklet? I could be just extra slow today, but please could you 
explain how we allowed a submission which can't be rejected earlier than 
in the tasklet. What sequence of events leads to it?

> 
>>>>> +             mask = ve->siblings[0]->mask;
>>>>> +     }
>>>>> +
>>>>> +     GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
>>>>> +               ve->base.name,
>>>>> +               rq->fence.context, rq->fence.seqno,
>>>>> +               mask, ve->base.execlists.queue_priority_hint);
>>>>> +
>>>>> +     return mask;
>>>>> +}
>>>>> +
>>>>>     static void virtual_submission_tasklet(unsigned long data)
>>>>>     {
>>>>>         struct virtual_engine * const ve = (struct virtual_engine *)data;
>>>>>         const int prio = ve->base.execlists.queue_priority_hint;
>>>>> +     intel_engine_mask_t mask;
>>>>>         unsigned int n;
>>>>>     
>>>>> +     rcu_read_lock();
>>>>> +     mask = virtual_submission_mask(ve);
>>>>> +     rcu_read_unlock();
>>>>
>>>> What is the RCU for?
>>>
>>> Accessing ve->request. There's nothing stopping another engine from
>>> spotting the ve->request still in its tree, submitting it and it being
>>> retired all during the read here.
>>
>> AFAIU there can only be one instance of virtual_submission_tasklet per
>> VE at a time and the code above is before the request is inserted into
>> physical engine trees. So I don't get it.
> 
> But the veng is being utilized by real engines concurrently, they are
> who take the ve->request and execute it and so may free the ve->request
> behind the submission tasklet's back. Later on the spinlock comes into
> play after we have decided there's a request ready.

How can real engines see this request at this point since it hasn't been 
put in the queue yet?

And if it is protecting against the tasklet then it should be 
local_bh_disable/enable. But wait.. it is a tasklet already so that also 
doesn't make sense.

So I just don't see it.

I guess it is related to the question of zero intersected mask. If that 
would be impossible you would be able to fetch the mask from insige the 
locked section in the hunk one down.

> 
>> Hm.. but going back to the veng patch, there is a
>> GEM_BUG_ON(ve->request) in virtual_submit_request. Why couldn't this be
>> called multiple times in parallel for different requests?
> 
> Because we strictly ordered submission into the veng so that it only
> considers one ready request at a time. Processing more requests
> decreased global throughput as load-balancing is no longer "late" (the
> physical engines then amalgamate all the ve requests into one submit).

I got temporarily confused into thinking submit_notify is at the 
queued->runnable transition. You see what you are dealing with here. :I

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 28/32] drm/i915: Load balancing across a virtual engine
  2019-04-17 11:26   ` Tvrtko Ursulin
@ 2019-04-17 13:51     ` Chris Wilson
  0 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-17 13:51 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-17 12:26:04)
> 
> On 17/04/2019 08:56, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > index e19f84b006cc..f900f0680647 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > @@ -290,8 +290,12 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
> >                               break;
> >               }
> >               list_add(&rq->signal_link, pos);
> > -             if (pos == &ce->signals) /* catch transitions from empty list */
> > +             if (pos == &ce->signals) { /* catch transitions from empty */
> >                       list_move_tail(&ce->signal_link, &b->signalers);
> > +             } else if (ce->engine != rq->engine) { /* virtualised */
> > +                     list_move_tail(&ce->signal_link, &b->signalers);
> > +                     intel_engine_queue_breadcrumbs(rq->engine);
> 
> Is there significance in check not being based on engine->flags & VIRTUAL?

Imo, it feels a more generalised statement that we are transferring this
request onto a new backend -- I wanted to say something like
rq->engine != last_breadcrumb_engine (and I think this is as close as we
can get and a very good simulacrum).

> Actually, the signaling can get enabled either on the virtual or real 
> engine, depending on timing. I don't see that irq_enable/disable vfuncs 
> will be present on the veng though. So how does that work?

rq->engine is always a real engine here, as we are under the rq->lock
and have the I915_FENCE_FLAG_ACTIVE bit set.

> Maybe there is a clue in presence of intel_engine_queue_breadcrumbs but 
> I don't see it.
> 
> > +     for (rb = rb_first_cached(&execlists->virtual); rb; ) {
> > +             struct virtual_engine *ve =
> > +                     rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> > +             struct i915_request *rq = READ_ONCE(ve->request);
> > +
> > +             if (!rq) { /* lazily cleanup after another engine handled rq */
> > +                     rb_erase_cached(rb, &execlists->virtual);
> > +                     RB_CLEAR_NODE(rb);
> > +                     rb = rb_first_cached(&execlists->virtual);
> > +                     continue;
> > +             }
> > +
> > +             if (!virtual_matches(ve, rq, engine)) {
> > +                     rb = rb_next(rb);
> > +                     continue;
> > +             }
> 
> Can this create a problem where, since the queue is supposed to be in 
> submission order, here it skips some requests if the owning veng is 
> already executing something and so inverts the dependency chain?
> 
> Before semaphore I guess it wasn't possible since only runnable requests 
> would be in the queue. But with semaphores I am not sure. Paranoid thing 
> would be to not dequeue any other veng request if engine is busy with 
> some veng. But maybe that would serialize things too much. Or it is too 
> paranoid?

Even with semaphores, the other request cannot be put into the tree
until its signaling request has started. If preempted, we should be
adjusting the priorities such that the preempted request is put back
before its children.

> > +     while (rb) { /* XXX virtual is always taking precedence */
> > +             struct virtual_engine *ve =
> > +                     rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> > +             struct i915_request *rq;
> > +
> > +             spin_lock(&ve->base.timeline.lock);
> > +
> > +             rq = ve->request;
> > +             if (unlikely(!rq)) { /* lost the race to a sibling */
> > +                     spin_unlock(&ve->base.timeline.lock);
> > +                     rb_erase_cached(rb, &execlists->virtual);
> > +                     RB_CLEAR_NODE(rb);
> > +                     rb = rb_first_cached(&execlists->virtual);
> > +                     continue;
> > +             }
> > +
> > +             GEM_BUG_ON(rq != ve->request);
> > +             GEM_BUG_ON(rq->engine != &ve->base);
> > +             GEM_BUG_ON(rq->hw_context != &ve->context);
> > +
> > +             if (rq_prio(rq) >= queue_prio(execlists)) {
> > +                     if (!virtual_matches(ve, rq, engine)) {
> > +                             spin_unlock(&ve->base.timeline.lock);
> > +                             rb = rb_next(rb);
> > +                             continue;
> 
> Is this needed? The first virtual_matches loop skipped all requests 
> which shouldn't be dequeued and leaves rb pointing to the top priority 
> one which can.

Now we have the lock, and so we need to check that we actually own the
ve->request. If we decide the first wasn't good enough, but the second
is still a better choice than the normal queue, we need to confirm that
we can submit it on this engine.

> > +static void virtual_context_destroy(struct kref *kref)
> > +{
> > +     struct virtual_engine *ve =
> > +             container_of(kref, typeof(*ve), context.ref);
> > +     unsigned int n;
> > +
> > +     GEM_BUG_ON(ve->request);
> > +     GEM_BUG_ON(ve->context.active);
> > +
> > +     for (n = 0; n < ve->num_siblings; n++) {
> > +             struct intel_engine_cs *sibling = ve->siblings[n];
> > +             struct rb_node *node = &ve->nodes[sibling->id].rb;
> > +
> > +             if (RB_EMPTY_NODE(node))
> > +                     continue;
> > +
> > +             spin_lock_irq(&sibling->timeline.lock);
> > +
> > +             if (!RB_EMPTY_NODE(node))
> > +                     rb_erase_cached(node, &sibling->execlists.virtual);
> 
> A consequence of rq duplication across phyisical engines? Leave a comment?

A consequence of the asynchronous cleanup inside the execlists_dequeue.

> > +/*
> > + * i915_context_engines_load_balance:
> > + *
> > + * Enable load balancing across this set of engines.
> > + *
> > + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
> > + * used will proxy the execbuffer request onto one of the set of engines
> > + * in such a way as to distribute the load evenly across the set.
> > + *
> > + * The set of engines must be compatible (e.g. the same HW class) as they
> > + * will share the same logical GPU context and ring.
> > + *
> > + * To intermix rendering with the virtual engine and direct rendering onto
> > + * the backing engines (bypassing the load balancing proxy), the context must
> > + * be defined to use a single timeline for all engines.
> > + */
> > +struct i915_context_engines_load_balance {
> > +     struct i915_user_extension base;
> > +
> > +     __u16 engine_index;
> > +     __u16 num_siblings;
> > +     __u32 flags; /* all undefined flags must be zero */
> > +
> > +     __u64 mbz64[1]; /* reserved for future use; must be zero */
> 
> Why an array if only one? Should we add a few more just in case?

Hopefully that's sarcasm. :-p
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 31/32] drm/i915/execlists: Virtual engine bonding
  2019-04-17  7:56 ` [PATCH 31/32] drm/i915/execlists: Virtual engine bonding Chris Wilson
@ 2019-04-18  6:47   ` Tvrtko Ursulin
  2019-04-18  6:57     ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-18  6:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> Some users require that when a master batch is executed on one particular
> engine, a companion batch is run simultaneously on a specific slave
> engine. For this purpose, we introduce virtual engine bonding, allowing
> maps of master:slaves to be constructed to constrain which physical
> engines a virtual engine may select given a fence on a master engine.
> 
> For the moment, we continue to ignore the issue of preemption deferring
> the master request for later. Ideally, we would like to then also remove
> the slave and run something else rather than have it stall the pipeline.
> With load balancing, we should be able to move workload around it, but
> there is a similar stall on the master pipeline while it may wait for
> the slave to be executed. At the cost of more latency for the bonded
> request, it may be interesting to launch both on their engines in
> lockstep. (Bubbles abound.)
> 
> Opens: Also what about bonding an engine as its own master? It doesn't
> break anything internally, so allow the silliness.
> 
> v2: Emancipate the bonds
> v3: Couple in delayed scheduling for the selftests
> v4: Handle invalid mutually exclusive bonding
> v5: Mention what the uapi does
> v6: s/nbond/num_bonds/
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |   7 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  98 +++++++++
>   drivers/gpu/drm/i915/gt/intel_lrc.h           |   4 +
>   drivers/gpu/drm/i915/gt/selftest_lrc.c        | 191 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_context.c       |  86 ++++++++
>   drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
>   include/uapi/drm/i915_drm.h                   |  35 ++++
>   7 files changed, 424 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 6dceb78e95d7..18b9175835c7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -405,6 +405,13 @@ struct intel_engine_cs {
>   	 */
>   	void		(*submit_request)(struct i915_request *rq);
>   
> +	/*
> +	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
> +	 * request down to the bonded pairs.
> +	 */
> +	void            (*bond_execute)(struct i915_request *rq,
> +					struct dma_fence *signal);
> +
>   	/*
>   	 * Call when the priority on a request has changed and it and its
>   	 * dependencies may need rescheduling. Note the request itself may
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 560a18bb4cbb..1b5b0937be25 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -191,6 +191,18 @@ struct virtual_engine {
>   		int prio;
>   	} nodes[I915_NUM_ENGINES];
>   
> +	/*
> +	 * Keep track of bonded pairs -- restrictions upon on our selection
> +	 * of physical engines any particular request may be submitted to.
> +	 * If we receive a submit-fence from a master engine, we will only
> +	 * use one of sibling_mask physical engines.
> +	 */
> +	struct ve_bond {
> +		const struct intel_engine_cs *master;
> +		intel_engine_mask_t sibling_mask;
> +	} *bonds;
> +	unsigned int num_bonds;
> +
>   	/* And finally, which physical engines this virtual engine maps onto. */
>   	unsigned int num_siblings;
>   	struct intel_engine_cs *siblings[0];
> @@ -969,6 +981,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   			rb_erase_cached(rb, &execlists->virtual);
>   			RB_CLEAR_NODE(rb);
>   
> +			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
>   			rq->engine = engine;
>   
>   			if (engine != ve->siblings[0]) {
> @@ -3082,6 +3095,8 @@ static void virtual_context_destroy(struct kref *kref)
>   	if (ve->context.state)
>   		__execlists_context_fini(&ve->context);
>   
> +	kfree(ve->bonds);
> +
>   	i915_timeline_fini(&ve->base.timeline);
>   	kfree(ve);
>   }
> @@ -3277,6 +3292,38 @@ static void virtual_submit_request(struct i915_request *rq)
>   	tasklet_schedule(&ve->base.execlists.tasklet);
>   }
>   
> +static struct ve_bond *
> +virtual_find_bond(struct virtual_engine *ve,
> +		  const struct intel_engine_cs *master)
> +{
> +	int i;
> +
> +	for (i = 0; i < ve->num_bonds; i++) {
> +		if (ve->bonds[i].master == master)
> +			return &ve->bonds[i];
> +	}
> +
> +	return NULL;
> +}
> +
> +static void
> +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(rq->engine);
> +	struct ve_bond *bond;
> +
> +	bond = virtual_find_bond(ve, to_request(signal)->engine);
> +	if (bond) {
> +		intel_engine_mask_t old, new, cmp;
> +
> +		cmp = READ_ONCE(rq->execution_mask);
> +		do {
> +			old = cmp;
> +			new = cmp & bond->sibling_mask;
> +		} while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);

Loop implies someone else might be modifying the rq->execution_mask in 
parallel?

> +	}
> +}
> +
>   struct intel_context *
>   intel_execlists_create_virtual(struct i915_gem_context *ctx,
>   			       struct intel_engine_cs **siblings,
> @@ -3315,6 +3362,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>   
>   	ve->base.schedule = i915_schedule;
>   	ve->base.submit_request = virtual_submit_request;
> +	ve->base.bond_execute = virtual_bond_execute;
>   
>   	ve->base.execlists.queue_priority_hint = INT_MIN;
>   	tasklet_init(&ve->base.execlists.tasklet,
> @@ -3404,9 +3452,59 @@ intel_execlists_clone_virtual(struct i915_gem_context *ctx,
>   	if (IS_ERR(dst))
>   		return dst;
>   
> +	if (se->num_bonds) {
> +		struct virtual_engine *de = to_virtual_engine(dst->engine);
> +
> +		de->bonds = kmemdup(se->bonds,
> +				    sizeof(*se->bonds) * se->num_bonds,
> +				    GFP_KERNEL);
> +		if (!de->bonds) {
> +			intel_context_put(dst);
> +			return ERR_PTR(-ENOMEM);
> +		}
> +
> +		de->num_bonds = se->num_bonds;
> +	}
> +
>   	return dst;
>   }
>   
> +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> +				     const struct intel_engine_cs *master,
> +				     const struct intel_engine_cs *sibling)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(engine);
> +	struct ve_bond *bond;
> +	int n;
> +
> +	/* Sanity check the sibling is part of the virtual engine */
> +	for (n = 0; n < ve->num_siblings; n++)
> +		if (sibling == ve->siblings[n])
> +			break;
> +	if (n == ve->num_siblings)
> +		return -EINVAL;
> +
> +	bond = virtual_find_bond(ve, master);
> +	if (bond) {
> +		bond->sibling_mask |= sibling->mask;
> +		return 0;
> +	}
> +
> +	bond = krealloc(ve->bonds,
> +			sizeof(*bond) * (ve->num_bonds + 1),
> +			GFP_KERNEL);
> +	if (!bond)
> +		return -ENOMEM;
> +
> +	bond[ve->num_bonds].master = master;
> +	bond[ve->num_bonds].sibling_mask = sibling->mask;
> +
> +	ve->bonds = bond;
> +	ve->num_bonds++;
> +
> +	return 0;
> +}
> +
>   void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   				   struct drm_printer *m,
>   				   void (*show_request)(struct drm_printer *m,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index 5530606052e5..e029aee87adf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -123,4 +123,8 @@ struct intel_context *
>   intel_execlists_clone_virtual(struct i915_gem_context *ctx,
>   			      struct intel_engine_cs *src);
>   
> +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> +				     const struct intel_engine_cs *master,
> +				     const struct intel_engine_cs *sibling);
> +
>   #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 209e51ef13e6..3f456a8b727b 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -13,6 +13,7 @@
>   #include "selftests/igt_gem_utils.h"
>   #include "selftests/igt_live_test.h"
>   #include "selftests/igt_spinner.h"
> +#include "selftests/lib_sw_fence.h"
>   #include "selftests/mock_context.h"
>   
>   static int live_sanitycheck(void *arg)
> @@ -1610,6 +1611,195 @@ static int live_virtual_mask(void *arg)
>   	return err;
>   }
>   
> +static int bond_virtual_engine(struct drm_i915_private *i915,
> +			       unsigned int class,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int nsibling,
> +			       unsigned int flags)
> +#define BOND_SCHEDULE BIT(0)
> +{
> +	struct intel_engine_cs *master;
> +	struct i915_gem_context *ctx;
> +	struct i915_request *rq[16];
> +	enum intel_engine_id id;
> +	unsigned long n;
> +	int err;
> +
> +	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
> +
> +	ctx = kernel_context(i915);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	err = 0;
> +	rq[0] = ERR_PTR(-ENOMEM);
> +	for_each_engine(master, i915, id) {
> +		struct i915_sw_fence fence = {};
> +
> +		if (master->class == class)
> +			continue;
> +
> +		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
> +
> +		rq[0] = igt_request_alloc(ctx, master);
> +		if (IS_ERR(rq[0])) {
> +			err = PTR_ERR(rq[0]);
> +			goto out;
> +		}
> +		i915_request_get(rq[0]);
> +
> +		if (flags & BOND_SCHEDULE) {
> +			onstack_fence_init(&fence);
> +			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
> +							       &fence,
> +							       GFP_KERNEL);
> +		}
> +		i915_request_add(rq[0]);
> +		if (err < 0)
> +			goto out;
> +
> +		for (n = 0; n < nsibling; n++) {
> +			struct intel_context *ve;
> +
> +			ve = intel_execlists_create_virtual(ctx,
> +							    siblings,
> +							    nsibling);
> +			if (IS_ERR(ve)) {
> +				err = PTR_ERR(ve);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +
> +			err = intel_virtual_engine_attach_bond(ve->engine,
> +							       master,
> +							       siblings[n]);
> +			if (err) {
> +				intel_context_put(ve);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +
> +			err = intel_context_pin(ve);
> +			intel_context_put(ve);
> +			if (err) {
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +
> +			rq[n + 1] = i915_request_create(ve);
> +			intel_context_unpin(ve);
> +			if (IS_ERR(rq[n + 1])) {
> +				err = PTR_ERR(rq[n + 1]);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +			i915_request_get(rq[n + 1]);
> +
> +			err = i915_request_await_execution(rq[n + 1],
> +							   &rq[0]->fence,
> +							   ve->engine->bond_execute);
> +			i915_request_add(rq[n + 1]);
> +			if (err < 0) {
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +		}
> +		onstack_fence_fini(&fence);
> +
> +		if (i915_request_wait(rq[0],
> +				      I915_WAIT_LOCKED,
> +				      HZ / 10) < 0) {
> +			pr_err("Master request did not execute (on %s)!\n",
> +			       rq[0]->engine->name);
> +			err = -EIO;
> +			goto out;
> +		}
> +
> +		for (n = 0; n < nsibling; n++) {
> +			if (i915_request_wait(rq[n + 1],
> +					      I915_WAIT_LOCKED,
> +					      MAX_SCHEDULE_TIMEOUT) < 0) {
> +				err = -EIO;
> +				goto out;
> +			}
> +
> +			if (rq[n + 1]->engine != siblings[n]) {
> +				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
> +				       siblings[n]->name,
> +				       rq[n + 1]->engine->name,
> +				       rq[0]->engine->name);
> +				err = -EINVAL;
> +				goto out;
> +			}
> +		}
> +
> +		for (n = 0; !IS_ERR(rq[n]); n++)
> +			i915_request_put(rq[n]);
> +		rq[0] = ERR_PTR(-ENOMEM);
> +	}
> +
> +out:
> +	for (n = 0; !IS_ERR(rq[n]); n++)
> +		i915_request_put(rq[n]);
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	kernel_context_close(ctx);
> +	return err;
> +}
> +
> +static int live_virtual_bond(void *arg)
> +{
> +	static const struct phase {
> +		const char *name;
> +		unsigned int flags;
> +	} phases[] = {
> +		{ "", 0 },
> +		{ "schedule", BOND_SCHEDULE },
> +		{ },
> +	};
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	unsigned int class, inst;
> +	int err = 0;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		const struct phase *p;
> +		int nsibling;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;
> +
> +			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		for (p = phases; p->name; p++) {
> +			err = bond_virtual_engine(i915,
> +						  class, siblings, nsibling,
> +						  p->flags);
> +			if (err) {
> +				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
> +				       __func__, p->name, class, nsibling, err);
> +				goto out_unlock;
> +			}
> +		}
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -1624,6 +1814,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt_smoke),
>   		SUBTEST(live_virtual_engine),
>   		SUBTEST(live_virtual_mask),
> +		SUBTEST(live_virtual_bond),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 57b09f624bb4..7418a2742f0f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1491,8 +1491,94 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
>   	return err;
>   }
>   
> +static int
> +set_engines__bond(struct i915_user_extension __user *base, void *data)
> +{
> +	struct i915_context_engines_bond __user *ext =
> +		container_of_user(base, typeof(*ext), base);
> +	const struct set_engines *set = data;
> +	struct intel_engine_cs *virtual;
> +	struct intel_engine_cs *master;
> +	u16 class, instance;
> +	u16 idx, num_bonds;
> +	int err, n;
> +
> +	if (get_user(idx, &ext->virtual_index))
> +		return -EFAULT;
> +
> +	if (idx >= set->engines->num_engines) {
> +		DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n",
> +			  idx, set->engines->num_engines);
> +		return -EINVAL;
> +	}
> +
> +	idx = array_index_nospec(idx, set->engines->num_engines);
> +	if (!set->engines->engines[idx]) {
> +		DRM_DEBUG("Invalid engine at %d\n", idx);
> +		return -EINVAL;
> +	}
> +
> +	/*
> +	 * A non-virtual engine has 0 siblings to choose between; and submit
> +	 * fence will always be directed to the one engine.
> +	 */
> +	virtual = set->engines->engines[idx]->engine;
> +	if (!intel_engine_is_virtual(virtual))
> +		return 0;

Hmm wouldn't we strictly speaking need to distinguish between uAPI 
errors and auto-magic-single-veng-replacement? Latter is OK to return 
success, but former should be reported as -EINVAL I think.

> +
> +	err = check_user_mbz(&ext->flags);
> +	if (err)
> +		return err;
> +
> +	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> +		err = check_user_mbz(&ext->mbz64[n]);
> +		if (err)
> +			return err;
> +	}
> +
> +	if (get_user(class, &ext->master_class))
> +		return -EFAULT;
> +
> +	if (get_user(instance, &ext->master_instance))
> +		return -EFAULT;
> +
> +	master = intel_engine_lookup_user(set->ctx->i915, class, instance);
> +	if (!master) {
> +		DRM_DEBUG("Unrecognised master engine: { class:%d, instance:%d }\n",
> +			  class, instance);
> +		return -EINVAL;
> +	}
> +
> +	if (get_user(num_bonds, &ext->num_bonds))
> +		return -EFAULT;

Should num_bonds > virtual->num_siblings be an error?

> +
> +	for (n = 0; n < num_bonds; n++) {
> +		struct intel_engine_cs *bond;
> +		struct i915_engine_class_instance ci;
> +
> +		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci)))
> +			return -EFAULT;
> +
> +		bond = intel_engine_lookup_user(set->ctx->i915,
> +						ci.engine_class,
> +						ci.engine_instance);
> +		if (!bond) {
> +			DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n",
> +				  n, ci.engine_class, ci.engine_instance);
> +			return -EINVAL;
> +		}
> +
> +		err = intel_virtual_engine_attach_bond(virtual, master, bond);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
>   static const i915_user_extension_fn set_engines__extensions[] = {
>   	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
> +	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
>   };
>   
>   static int
> diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> index 2bfa72c1654b..b976c12817c5 100644
> --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> @@ -45,6 +45,9 @@ void __onstack_fence_init(struct i915_sw_fence *fence,
>   
>   void onstack_fence_fini(struct i915_sw_fence *fence)
>   {
> +	if (!fence->flags)
> +		return;
> +
>   	i915_sw_fence_commit(fence);
>   	i915_sw_fence_fini(fence);
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index ff2ababc0984..091872d24588 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1543,6 +1543,10 @@ struct drm_i915_gem_context_param {
>    * sized argument, will revert back to default settings.
>    *
>    * See struct i915_context_param_engines.
> + *
> + * Extensions:
> + *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
> + *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
>    */
>   #define I915_CONTEXT_PARAM_ENGINES	0xa
>   /* Must be kept compact -- no holes and well documented */
> @@ -1645,9 +1649,40 @@ struct i915_context_engines_load_balance {
>   	struct i915_engine_class_instance engines[N__]; \
>   } __attribute__((packed)) name__
>   
> +/*
> + * i915_context_engines_bond:
> + *
> + * Constructed bonded pairs for execution within a virtual engine.
> + *
> + * All engines are equal, but some are more equal than others. Given
> + * the distribution of resources in the HW, it may be preferable to run
> + * a request on a given subset of engines in parallel to a request on a
> + * specific engine. We enable this selection of engines within a virtual
> + * engine by specifying bonding pairs, for any given master engine we will
> + * only execute on one of the corresponding siblings within the virtual engine.
> + *
> + * To execute a request in parallel on the master engine and a sibling requires
> + * coordination with a I915_EXEC_FENCE_SUBMIT.
> + */
> +struct i915_context_engines_bond {
> +	struct i915_user_extension base;
> +
> +	__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
> +	__u16 num_bonds;
> +
> +	__u16 master_class;
> +	__u16 master_instance;

struct i915_engine_class_instance master; ?

> +
> +	__u64 flags; /* all undefined flags must be zero */
> +	__u64 mbz64[4]; /* reserved for future use; must be zero */
> +
> +	struct i915_engine_class_instance engines[0];
> +} __attribute__((packed));
> +
>   struct i915_context_param_engines {
>   	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
>   #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
> +#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
>   	struct i915_engine_class_instance engines[0];
>   } __attribute__((packed));
>   
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 31/32] drm/i915/execlists: Virtual engine bonding
  2019-04-18  6:47   ` Tvrtko Ursulin
@ 2019-04-18  6:57     ` Chris Wilson
  2019-04-18  8:57       ` Tvrtko Ursulin
  0 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-18  6:57 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-18 07:47:51)
> 
> On 17/04/2019 08:56, Chris Wilson wrote:
> > +static void
> > +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
> > +{
> > +     struct virtual_engine *ve = to_virtual_engine(rq->engine);
> > +     struct ve_bond *bond;
> > +
> > +     bond = virtual_find_bond(ve, to_request(signal)->engine);
> > +     if (bond) {
> > +             intel_engine_mask_t old, new, cmp;
> > +
> > +             cmp = READ_ONCE(rq->execution_mask);
> > +             do {
> > +                     old = cmp;
> > +                     new = cmp & bond->sibling_mask;
> > +             } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
> 
> Loop implies someone else might be modifying the rq->execution_mask in 
> parallel?

There's nothing that prevents there being multiple bonds being
executed simultaneously (other than practicality). There's also nothing
that says this should be the only way to modify rq->execution_mask in
the future.

> > +static int
> > +set_engines__bond(struct i915_user_extension __user *base, void *data)
> > +{
> > +     struct i915_context_engines_bond __user *ext =
> > +             container_of_user(base, typeof(*ext), base);
> > +     const struct set_engines *set = data;
> > +     struct intel_engine_cs *virtual;
> > +     struct intel_engine_cs *master;
> > +     u16 class, instance;
> > +     u16 idx, num_bonds;
> > +     int err, n;
> > +
> > +     if (get_user(idx, &ext->virtual_index))
> > +             return -EFAULT;
> > +
> > +     if (idx >= set->engines->num_engines) {
> > +             DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n",
> > +                       idx, set->engines->num_engines);
> > +             return -EINVAL;
> > +     }
> > +
> > +     idx = array_index_nospec(idx, set->engines->num_engines);
> > +     if (!set->engines->engines[idx]) {
> > +             DRM_DEBUG("Invalid engine at %d\n", idx);
> > +             return -EINVAL;
> > +     }
> > +
> > +     /*
> > +      * A non-virtual engine has 0 siblings to choose between; and submit
> > +      * fence will always be directed to the one engine.
> > +      */
> > +     virtual = set->engines->engines[idx]->engine;
> > +     if (!intel_engine_is_virtual(virtual))
> > +             return 0;
> 
> Hmm wouldn't we strictly speaking need to distinguish between uAPI 
> errors and auto-magic-single-veng-replacement? Latter is OK to return 
> success, but former should be reported as -EINVAL I think.

Is it a uAPI error if it works? :)

> > +
> > +     err = check_user_mbz(&ext->flags);
> > +     if (err)
> > +             return err;
> > +
> > +     for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> > +             err = check_user_mbz(&ext->mbz64[n]);
> > +             if (err)
> > +                     return err;
> > +     }
> > +
> > +     if (get_user(class, &ext->master_class))
> > +             return -EFAULT;
> > +
> > +     if (get_user(instance, &ext->master_instance))
> > +             return -EFAULT;
> > +
> > +     master = intel_engine_lookup_user(set->ctx->i915, class, instance);
> > +     if (!master) {
> > +             DRM_DEBUG("Unrecognised master engine: { class:%d, instance:%d }\n",
> > +                       class, instance);
> > +             return -EINVAL;
> > +     }
> > +
> > +     if (get_user(num_bonds, &ext->num_bonds))
> > +             return -EFAULT;
> 
> Should num_bonds > virtual->num_siblings be an error?

They could specify the same bond multiple times for whatever reason (and
probably should allow skipping NONE?), if the target doesn't exist that's
definitely an error.

> > +/*
> > + * i915_context_engines_bond:
> > + *
> > + * Constructed bonded pairs for execution within a virtual engine.
> > + *
> > + * All engines are equal, but some are more equal than others. Given
> > + * the distribution of resources in the HW, it may be preferable to run
> > + * a request on a given subset of engines in parallel to a request on a
> > + * specific engine. We enable this selection of engines within a virtual
> > + * engine by specifying bonding pairs, for any given master engine we will
> > + * only execute on one of the corresponding siblings within the virtual engine.
> > + *
> > + * To execute a request in parallel on the master engine and a sibling requires
> > + * coordination with a I915_EXEC_FENCE_SUBMIT.
> > + */
> > +struct i915_context_engines_bond {
> > +     struct i915_user_extension base;
> > +
> > +     __u16 virtual_index; /* index of virtual engine in ctx->engines[] */
> > +     __u16 num_bonds;
> > +
> > +     __u16 master_class;
> > +     __u16 master_instance;
> 
> struct i915_engine_class_instance master; ?

Yup, the oversight struck me when updating the igt.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine
  2019-04-17 13:32           ` Tvrtko Ursulin
@ 2019-04-18  7:24             ` Chris Wilson
  0 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-18  7:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-17 14:32:03)
> 
> On 17/04/2019 13:46, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-04-17 13:35:29)
> >>
> >> On 17/04/2019 12:57, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2019-04-17 12:43:49)
> >>>>
> >>>> On 17/04/2019 08:56, Chris Wilson wrote:
> >>>>> Allow the user to direct which physical engines of the virtual engine
> >>>>> they wish to execute one, as sometimes it is necessary to override the
> >>>>> load balancing algorithm.
> >>>>>
> >>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>>> ---
> >>>>>     drivers/gpu/drm/i915/gt/intel_lrc.c    |  58 +++++++++++
> >>>>>     drivers/gpu/drm/i915/gt/selftest_lrc.c | 131 +++++++++++++++++++++++++
> >>>>>     drivers/gpu/drm/i915/i915_request.c    |   1 +
> >>>>>     drivers/gpu/drm/i915/i915_request.h    |   3 +
> >>>>>     4 files changed, 193 insertions(+)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>>>> index d6efd6aa67cb..560a18bb4cbb 100644
> >>>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>>>> @@ -552,6 +552,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
> >>>>>         intel_engine_context_out(rq->engine);
> >>>>>         execlists_context_status_change(rq, status);
> >>>>>         trace_i915_request_out(rq);
> >>>>> +
> >>>>> +     /*
> >>>>> +      * If this is part of a virtual engine, its next request may have
> >>>>> +      * been blocked waiting for access to the active context. We have
> >>>>> +      * to kick all the siblings again in case we need to switch (e.g.
> >>>>> +      * the next request is not runnable on this engine). Hopefully,
> >>>>> +      * we will already have submitted the next request before the
> >>>>> +      * tasklet runs and do not need to rebuild each virtual tree
> >>>>> +      * and kick everyone again.
> >>>>> +      */
> >>>>> +     if (rq->engine != rq->hw_context->engine)
> >>>>> +             tasklet_schedule(&rq->hw_context->engine->execlists.tasklet);
> >>>>
> >>>> Is this needed only for non-default execution_mask? If so it would be
> >>>> good to limit it to avoid tasklet storm with plain veng.
> >>>
> >>> The issue is not just with this rq but the next one. If that has a
> >>> restricted mask that prevents it running on this engine, we may have
> >>> missed the opportunity to queue it (and so never run it under just the
> >>> right circumstances).
> >>>
> >>> Something like
> >>>        to_virtual_engine(rq->hw_context->engine)->request->execution_mask & ~rq->execution_mask
> >>>
> >>> The storm isn't quite so bad, it's only on context-out, and we often do
> >>> succeed in keeping it busy. I was just trying to avoid pulling in ve here.
> >>
> >> What do you mean by the "pulling in ve" bit? Avoiding using
> >> to_virtual_engine like in the line you wrote above?
> > 
> > Just laziness hiding behind an excuse of trying to not to smear veng too
> > widely.
> > 
> >>>>> +
> >>>>> +     rq = READ_ONCE(ve->request);
> >>>>> +     if (!rq)
> >>>>> +             return 0;
> >>>>> +
> >>>>> +     /* The rq is ready for submission; rq->execution_mask is now stable. */
> >>>>> +     mask = rq->execution_mask;
> >>>>> +     if (unlikely(!mask)) {
> >>>>> +             /* Invalid selection, submit to a random engine in error */
> >>>>> +             i915_request_skip(rq, -ENODEV);
> >>>>
> >>>> When can this happen? It looks like if it can happen we should reject it
> >>>> earlier. Or if it can't then just assert.
> >>>
> >>> Many submit fences can end up with an interesection of 0. This is the
> >>> convenient point to do the rejection, as with any other asynchronous
> >>> error.
> >>
> >> Which ones are many? Why would we have uAPI which allows setting
> >> impossible things where all requests will fail with -ENODEV?
> > 
> > But we are rejecting them in the uAPI, right here. This is the earliest
> > point where all the information for a particular execbuf is available
> > and we have the means of reporting that back.
> 
> In the tasklet? I could be just extra slow today, but please could you 
> explain how we allowed a submission which can't be rejected earlier than 
> in the tasklet. What sequence of events leads to it?

We can not know all the paths that lead to modification of
execution_mask.

> >>>>> +             mask = ve->siblings[0]->mask;
> >>>>> +     }
> >>>>> +
> >>>>> +     GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
> >>>>> +               ve->base.name,
> >>>>> +               rq->fence.context, rq->fence.seqno,
> >>>>> +               mask, ve->base.execlists.queue_priority_hint);
> >>>>> +
> >>>>> +     return mask;
> >>>>> +}
> >>>>> +
> >>>>>     static void virtual_submission_tasklet(unsigned long data)
> >>>>>     {
> >>>>>         struct virtual_engine * const ve = (struct virtual_engine *)data;
> >>>>>         const int prio = ve->base.execlists.queue_priority_hint;
> >>>>> +     intel_engine_mask_t mask;
> >>>>>         unsigned int n;
> >>>>>     
> >>>>> +     rcu_read_lock();
> >>>>> +     mask = virtual_submission_mask(ve);
> >>>>> +     rcu_read_unlock();
> >>>>
> >>>> What is the RCU for?
> >>>
> >>> Accessing ve->request. There's nothing stopping another engine from
> >>> spotting the ve->request still in its tree, submitting it and it being
> >>> retired all during the read here.
> >>
> >> AFAIU there can only be one instance of virtual_submission_tasklet per
> >> VE at a time and the code above is before the request is inserted into
> >> physical engine trees. So I don't get it.
> > 
> > But the veng is being utilized by real engines concurrently, they are
> > who take the ve->request and execute it and so may free the ve->request
> > behind the submission tasklet's back. Later on the spinlock comes into
> > play after we have decided there's a request ready.
> 
> How can real engines see this request at this point since it hasn't been 
> put in the queue yet?

The veng is still in the tree on each engine from the last request. Any
physical engine may peek into the future as when we submit we only
remove the local engine to avoid taking all other locks.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 31/32] drm/i915/execlists: Virtual engine bonding
  2019-04-18  6:57     ` Chris Wilson
@ 2019-04-18  8:57       ` Tvrtko Ursulin
  2019-04-18  9:13         ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-18  8:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 18/04/2019 07:57, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-04-18 07:47:51)
>>
>> On 17/04/2019 08:56, Chris Wilson wrote:
>>> +static void
>>> +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
>>> +{
>>> +     struct virtual_engine *ve = to_virtual_engine(rq->engine);
>>> +     struct ve_bond *bond;
>>> +
>>> +     bond = virtual_find_bond(ve, to_request(signal)->engine);
>>> +     if (bond) {
>>> +             intel_engine_mask_t old, new, cmp;
>>> +
>>> +             cmp = READ_ONCE(rq->execution_mask);
>>> +             do {
>>> +                     old = cmp;
>>> +                     new = cmp & bond->sibling_mask;
>>> +             } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
>>
>> Loop implies someone else might be modifying the rq->execution_mask in
>> parallel?
> 
> There's nothing that prevents there being multiple bonds being
> executed simultaneously (other than practicality). There's also nothing
> that says this should be the only way to modify rq->execution_mask in
> the future.

But request is one, how can it be submitted multiple times simultaneously?

>>> +static int
>>> +set_engines__bond(struct i915_user_extension __user *base, void *data)
>>> +{
>>> +     struct i915_context_engines_bond __user *ext =
>>> +             container_of_user(base, typeof(*ext), base);
>>> +     const struct set_engines *set = data;
>>> +     struct intel_engine_cs *virtual;
>>> +     struct intel_engine_cs *master;
>>> +     u16 class, instance;
>>> +     u16 idx, num_bonds;
>>> +     int err, n;
>>> +
>>> +     if (get_user(idx, &ext->virtual_index))
>>> +             return -EFAULT;
>>> +
>>> +     if (idx >= set->engines->num_engines) {
>>> +             DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n",
>>> +                       idx, set->engines->num_engines);
>>> +             return -EINVAL;
>>> +     }
>>> +
>>> +     idx = array_index_nospec(idx, set->engines->num_engines);
>>> +     if (!set->engines->engines[idx]) {
>>> +             DRM_DEBUG("Invalid engine at %d\n", idx);
>>> +             return -EINVAL;
>>> +     }
>>> +
>>> +     /*
>>> +      * A non-virtual engine has 0 siblings to choose between; and submit
>>> +      * fence will always be directed to the one engine.
>>> +      */
>>> +     virtual = set->engines->engines[idx]->engine;
>>> +     if (!intel_engine_is_virtual(virtual))
>>> +             return 0;
>>
>> Hmm wouldn't we strictly speaking need to distinguish between uAPI
>> errors and auto-magic-single-veng-replacement? Latter is OK to return
>> success, but former should be reported as -EINVAL I think.
> 
> Is it a uAPI error if it works? :)

It works but what is the practical use? It more signals userspace got 
it's configuration wrong and if we silently accept it gets more 
difficult to figure out.

> 
>>> +
>>> +     err = check_user_mbz(&ext->flags);
>>> +     if (err)
>>> +             return err;
>>> +
>>> +     for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
>>> +             err = check_user_mbz(&ext->mbz64[n]);
>>> +             if (err)
>>> +                     return err;
>>> +     }
>>> +
>>> +     if (get_user(class, &ext->master_class))
>>> +             return -EFAULT;
>>> +
>>> +     if (get_user(instance, &ext->master_instance))
>>> +             return -EFAULT;
>>> +
>>> +     master = intel_engine_lookup_user(set->ctx->i915, class, instance);
>>> +     if (!master) {
>>> +             DRM_DEBUG("Unrecognised master engine: { class:%d, instance:%d }\n",
>>> +                       class, instance);
>>> +             return -EINVAL;
>>> +     }
>>> +
>>> +     if (get_user(num_bonds, &ext->num_bonds))
>>> +             return -EFAULT;
>>
>> Should num_bonds > virtual->num_siblings be an error?
> 
> They could specify the same bond multiple times for whatever reason (and
> probably should allow skipping NONE?), if the target doesn't exist that's
> definitely an error.

So which bond we pick if they specify multiple ones? Just the first one 
found. Hm actually I was thinking about making sure each master is only 
specified once, not siblings. For siblings we indeed do not care.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 31/32] drm/i915/execlists: Virtual engine bonding
  2019-04-18  8:57       ` Tvrtko Ursulin
@ 2019-04-18  9:13         ` Chris Wilson
  2019-04-18  9:50           ` Tvrtko Ursulin
  0 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-18  9:13 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-18 09:57:43)
> 
> On 18/04/2019 07:57, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-04-18 07:47:51)
> >>
> >> On 17/04/2019 08:56, Chris Wilson wrote:
> >>> +static void
> >>> +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
> >>> +{
> >>> +     struct virtual_engine *ve = to_virtual_engine(rq->engine);
> >>> +     struct ve_bond *bond;
> >>> +
> >>> +     bond = virtual_find_bond(ve, to_request(signal)->engine);
> >>> +     if (bond) {
> >>> +             intel_engine_mask_t old, new, cmp;
> >>> +
> >>> +             cmp = READ_ONCE(rq->execution_mask);
> >>> +             do {
> >>> +                     old = cmp;
> >>> +                     new = cmp & bond->sibling_mask;
> >>> +             } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
> >>
> >> Loop implies someone else might be modifying the rq->execution_mask in
> >> parallel?
> > 
> > There's nothing that prevents there being multiple bonds being
> > executed simultaneously (other than practicality). There's also nothing
> > that says this should be the only way to modify rq->execution_mask in
> > the future.
> 
> But request is one, how can it be submitted multiple times simultaneously?

You mean "How can it be signaled multiple times simultaneously?"

> 
> >>> +static int
> >>> +set_engines__bond(struct i915_user_extension __user *base, void *data)
> >>> +{
> >>> +     struct i915_context_engines_bond __user *ext =
> >>> +             container_of_user(base, typeof(*ext), base);
> >>> +     const struct set_engines *set = data;
> >>> +     struct intel_engine_cs *virtual;
> >>> +     struct intel_engine_cs *master;
> >>> +     u16 class, instance;
> >>> +     u16 idx, num_bonds;
> >>> +     int err, n;
> >>> +
> >>> +     if (get_user(idx, &ext->virtual_index))
> >>> +             return -EFAULT;
> >>> +
> >>> +     if (idx >= set->engines->num_engines) {
> >>> +             DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n",
> >>> +                       idx, set->engines->num_engines);
> >>> +             return -EINVAL;
> >>> +     }
> >>> +
> >>> +     idx = array_index_nospec(idx, set->engines->num_engines);
> >>> +     if (!set->engines->engines[idx]) {
> >>> +             DRM_DEBUG("Invalid engine at %d\n", idx);
> >>> +             return -EINVAL;
> >>> +     }
> >>> +
> >>> +     /*
> >>> +      * A non-virtual engine has 0 siblings to choose between; and submit
> >>> +      * fence will always be directed to the one engine.
> >>> +      */
> >>> +     virtual = set->engines->engines[idx]->engine;
> >>> +     if (!intel_engine_is_virtual(virtual))
> >>> +             return 0;
> >>
> >> Hmm wouldn't we strictly speaking need to distinguish between uAPI
> >> errors and auto-magic-single-veng-replacement? Latter is OK to return
> >> success, but former should be reported as -EINVAL I think.
> > 
> > Is it a uAPI error if it works? :)
> 
> It works but what is the practical use? It more signals userspace got 
> it's configuration wrong and if we silently accept it gets more 
> difficult to figure out.

At that point, I was being facetious. Memory says it was simpler to just
stick the virtual check at the start than have to insert it later. But
it's trivial to move later, so it's done.

> >>> +
> >>> +     err = check_user_mbz(&ext->flags);
> >>> +     if (err)
> >>> +             return err;
> >>> +
> >>> +     for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> >>> +             err = check_user_mbz(&ext->mbz64[n]);
> >>> +             if (err)
> >>> +                     return err;
> >>> +     }
> >>> +
> >>> +     if (get_user(class, &ext->master_class))
> >>> +             return -EFAULT;
> >>> +
> >>> +     if (get_user(instance, &ext->master_instance))
> >>> +             return -EFAULT;
> >>> +
> >>> +     master = intel_engine_lookup_user(set->ctx->i915, class, instance);
> >>> +     if (!master) {
> >>> +             DRM_DEBUG("Unrecognised master engine: { class:%d, instance:%d }\n",
> >>> +                       class, instance);
> >>> +             return -EINVAL;
> >>> +     }
> >>> +
> >>> +     if (get_user(num_bonds, &ext->num_bonds))
> >>> +             return -EFAULT;
> >>
> >> Should num_bonds > virtual->num_siblings be an error?
> > 
> > They could specify the same bond multiple times for whatever reason (and
> > probably should allow skipping NONE?), if the target doesn't exist that's
> > definitely an error.
> 
> So which bond we pick if they specify multiple ones? Just the first one 
> found. Hm actually I was thinking about making sure each master is only 
> specified once, not siblings. For siblings we indeed do not care.

No, it's a mask of if parent executes on master, use this set of
children.

I was reasonably happy to use a cumulative mask if master is specified
by more than one bond ext; but maybe it should be an intersection. Hmm.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 31/32] drm/i915/execlists: Virtual engine bonding
  2019-04-18  9:13         ` Chris Wilson
@ 2019-04-18  9:50           ` Tvrtko Ursulin
  2019-04-18  9:59             ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-18  9:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 18/04/2019 10:13, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-04-18 09:57:43)
>>
>> On 18/04/2019 07:57, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-04-18 07:47:51)
>>>>
>>>> On 17/04/2019 08:56, Chris Wilson wrote:
>>>>> +static void
>>>>> +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
>>>>> +{
>>>>> +     struct virtual_engine *ve = to_virtual_engine(rq->engine);
>>>>> +     struct ve_bond *bond;
>>>>> +
>>>>> +     bond = virtual_find_bond(ve, to_request(signal)->engine);
>>>>> +     if (bond) {
>>>>> +             intel_engine_mask_t old, new, cmp;
>>>>> +
>>>>> +             cmp = READ_ONCE(rq->execution_mask);
>>>>> +             do {
>>>>> +                     old = cmp;
>>>>> +                     new = cmp & bond->sibling_mask;
>>>>> +             } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
>>>>
>>>> Loop implies someone else might be modifying the rq->execution_mask in
>>>> parallel?
>>>
>>> There's nothing that prevents there being multiple bonds being
>>> executed simultaneously (other than practicality). There's also nothing
>>> that says this should be the only way to modify rq->execution_mask in
>>> the future.
>>
>> But request is one, how can it be submitted multiple times simultaneously?
> 
> You mean "How can it be signaled multiple times simultaneously?"

Okay yes, signaled. You could give same submit fence to multiple slaves, 
but you can't have same slave request receive notification from multiple 
masters.

Or you can if you build a composite fence and pass that in? Is this the 
story about signal-on-any vs signal-on-all?

>>
>>>>> +static int
>>>>> +set_engines__bond(struct i915_user_extension __user *base, void *data)
>>>>> +{
>>>>> +     struct i915_context_engines_bond __user *ext =
>>>>> +             container_of_user(base, typeof(*ext), base);
>>>>> +     const struct set_engines *set = data;
>>>>> +     struct intel_engine_cs *virtual;
>>>>> +     struct intel_engine_cs *master;
>>>>> +     u16 class, instance;
>>>>> +     u16 idx, num_bonds;
>>>>> +     int err, n;
>>>>> +
>>>>> +     if (get_user(idx, &ext->virtual_index))
>>>>> +             return -EFAULT;
>>>>> +
>>>>> +     if (idx >= set->engines->num_engines) {
>>>>> +             DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n",
>>>>> +                       idx, set->engines->num_engines);
>>>>> +             return -EINVAL;
>>>>> +     }
>>>>> +
>>>>> +     idx = array_index_nospec(idx, set->engines->num_engines);
>>>>> +     if (!set->engines->engines[idx]) {
>>>>> +             DRM_DEBUG("Invalid engine at %d\n", idx);
>>>>> +             return -EINVAL;
>>>>> +     }
>>>>> +
>>>>> +     /*
>>>>> +      * A non-virtual engine has 0 siblings to choose between; and submit
>>>>> +      * fence will always be directed to the one engine.
>>>>> +      */
>>>>> +     virtual = set->engines->engines[idx]->engine;
>>>>> +     if (!intel_engine_is_virtual(virtual))
>>>>> +             return 0;
>>>>
>>>> Hmm wouldn't we strictly speaking need to distinguish between uAPI
>>>> errors and auto-magic-single-veng-replacement? Latter is OK to return
>>>> success, but former should be reported as -EINVAL I think.
>>>
>>> Is it a uAPI error if it works? :)
>>
>> It works but what is the practical use? It more signals userspace got
>> it's configuration wrong and if we silently accept it gets more
>> difficult to figure out.
> 
> At that point, I was being facetious. Memory says it was simpler to just
> stick the virtual check at the start than have to insert it later. But
> it's trivial to move later, so it's done.
> 
>>>>> +
>>>>> +     err = check_user_mbz(&ext->flags);
>>>>> +     if (err)
>>>>> +             return err;
>>>>> +
>>>>> +     for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
>>>>> +             err = check_user_mbz(&ext->mbz64[n]);
>>>>> +             if (err)
>>>>> +                     return err;
>>>>> +     }
>>>>> +
>>>>> +     if (get_user(class, &ext->master_class))
>>>>> +             return -EFAULT;
>>>>> +
>>>>> +     if (get_user(instance, &ext->master_instance))
>>>>> +             return -EFAULT;
>>>>> +
>>>>> +     master = intel_engine_lookup_user(set->ctx->i915, class, instance);
>>>>> +     if (!master) {
>>>>> +             DRM_DEBUG("Unrecognised master engine: { class:%d, instance:%d }\n",
>>>>> +                       class, instance);
>>>>> +             return -EINVAL;
>>>>> +     }
>>>>> +
>>>>> +     if (get_user(num_bonds, &ext->num_bonds))
>>>>> +             return -EFAULT;
>>>>
>>>> Should num_bonds > virtual->num_siblings be an error?
>>>
>>> They could specify the same bond multiple times for whatever reason (and
>>> probably should allow skipping NONE?), if the target doesn't exist that's
>>> definitely an error.
>>
>> So which bond we pick if they specify multiple ones? Just the first one
>> found. Hm actually I was thinking about making sure each master is only
>> specified once, not siblings. For siblings we indeed do not care.
> 
> No, it's a mask of if parent executes on master, use this set of
> children.
> 
> I was reasonably happy to use a cumulative mask if master is specified
> by more than one bond ext; but maybe it should be an intersection. Hmm.

Do you see a realistic and making sense use case for specifying the same 
master in multiple bonds? If not I'd just disallow it and then we don't 
have a question of union vs intersection policy.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 31/32] drm/i915/execlists: Virtual engine bonding
  2019-04-18  9:50           ` Tvrtko Ursulin
@ 2019-04-18  9:59             ` Chris Wilson
  2019-04-18 10:24               ` Tvrtko Ursulin
  0 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-18  9:59 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-18 10:50:30)
> 
> On 18/04/2019 10:13, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-04-18 09:57:43)
> >>
> >> On 18/04/2019 07:57, Chris Wilson wrote:
> >>> Quoting Tvrtko Ursulin (2019-04-18 07:47:51)
> >>>>
> >>>> On 17/04/2019 08:56, Chris Wilson wrote:
> >>>>> +static void
> >>>>> +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
> >>>>> +{
> >>>>> +     struct virtual_engine *ve = to_virtual_engine(rq->engine);
> >>>>> +     struct ve_bond *bond;
> >>>>> +
> >>>>> +     bond = virtual_find_bond(ve, to_request(signal)->engine);
> >>>>> +     if (bond) {
> >>>>> +             intel_engine_mask_t old, new, cmp;
> >>>>> +
> >>>>> +             cmp = READ_ONCE(rq->execution_mask);
> >>>>> +             do {
> >>>>> +                     old = cmp;
> >>>>> +                     new = cmp & bond->sibling_mask;
> >>>>> +             } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
> >>>>
> >>>> Loop implies someone else might be modifying the rq->execution_mask in
> >>>> parallel?
> >>>
> >>> There's nothing that prevents there being multiple bonds being
> >>> executed simultaneously (other than practicality). There's also nothing
> >>> that says this should be the only way to modify rq->execution_mask in
> >>> the future.
> >>
> >> But request is one, how can it be submitted multiple times simultaneously?
> > 
> > You mean "How can it be signaled multiple times simultaneously?"
> 
> Okay yes, signaled. You could give same submit fence to multiple slaves, 
> but you can't have same slave request receive notification from multiple 
> masters.
> 
> Or you can if you build a composite fence and pass that in? Is this the 
> story about signal-on-any vs signal-on-all?

There's nothing inherent in the design to prevent virtual_bond_execute
being called multiple times given multiple fences along one or more
engines.

There's a practical limitation in the proposed uAPI to limit it to a
single submit-fence, but may indeed be a composite fence. There's also
the question of whether to squeeze in syncobj support.

> >>>>> +
> >>>>> +     err = check_user_mbz(&ext->flags);
> >>>>> +     if (err)
> >>>>> +             return err;
> >>>>> +
> >>>>> +     for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
> >>>>> +             err = check_user_mbz(&ext->mbz64[n]);
> >>>>> +             if (err)
> >>>>> +                     return err;
> >>>>> +     }
> >>>>> +
> >>>>> +     if (get_user(class, &ext->master_class))
> >>>>> +             return -EFAULT;
> >>>>> +
> >>>>> +     if (get_user(instance, &ext->master_instance))
> >>>>> +             return -EFAULT;
> >>>>> +
> >>>>> +     master = intel_engine_lookup_user(set->ctx->i915, class, instance);
> >>>>> +     if (!master) {
> >>>>> +             DRM_DEBUG("Unrecognised master engine: { class:%d, instance:%d }\n",
> >>>>> +                       class, instance);
> >>>>> +             return -EINVAL;
> >>>>> +     }
> >>>>> +
> >>>>> +     if (get_user(num_bonds, &ext->num_bonds))
> >>>>> +             return -EFAULT;
> >>>>
> >>>> Should num_bonds > virtual->num_siblings be an error?
> >>>
> >>> They could specify the same bond multiple times for whatever reason (and
> >>> probably should allow skipping NONE?), if the target doesn't exist that's
> >>> definitely an error.
> >>
> >> So which bond we pick if they specify multiple ones? Just the first one
> >> found. Hm actually I was thinking about making sure each master is only
> >> specified once, not siblings. For siblings we indeed do not care.
> > 
> > No, it's a mask of if parent executes on master, use this set of
> > children.
> > 
> > I was reasonably happy to use a cumulative mask if master is specified
> > by more than one bond ext; but maybe it should be an intersection. Hmm.
> 
> Do you see a realistic and making sense use case for specifying the same 
> master in multiple bonds? If not I'd just disallow it and then we don't 
> have a question of union vs intersection policy.

Rather the opposite, I don't see that it breaks anything nor need it be
ill-defined, hence no reason to reject as a means to protect oneself.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 31/32] drm/i915/execlists: Virtual engine bonding
  2019-04-18  9:59             ` Chris Wilson
@ 2019-04-18 10:24               ` Tvrtko Ursulin
  0 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-18 10:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 18/04/2019 10:59, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-04-18 10:50:30)
>>
>> On 18/04/2019 10:13, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-04-18 09:57:43)
>>>>
>>>> On 18/04/2019 07:57, Chris Wilson wrote:
>>>>> Quoting Tvrtko Ursulin (2019-04-18 07:47:51)
>>>>>>
>>>>>> On 17/04/2019 08:56, Chris Wilson wrote:
>>>>>>> +static void
>>>>>>> +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
>>>>>>> +{
>>>>>>> +     struct virtual_engine *ve = to_virtual_engine(rq->engine);
>>>>>>> +     struct ve_bond *bond;
>>>>>>> +
>>>>>>> +     bond = virtual_find_bond(ve, to_request(signal)->engine);
>>>>>>> +     if (bond) {
>>>>>>> +             intel_engine_mask_t old, new, cmp;
>>>>>>> +
>>>>>>> +             cmp = READ_ONCE(rq->execution_mask);
>>>>>>> +             do {
>>>>>>> +                     old = cmp;
>>>>>>> +                     new = cmp & bond->sibling_mask;
>>>>>>> +             } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
>>>>>>
>>>>>> Loop implies someone else might be modifying the rq->execution_mask in
>>>>>> parallel?
>>>>>
>>>>> There's nothing that prevents there being multiple bonds being
>>>>> executed simultaneously (other than practicality). There's also nothing
>>>>> that says this should be the only way to modify rq->execution_mask in
>>>>> the future.
>>>>
>>>> But request is one, how can it be submitted multiple times simultaneously?
>>>
>>> You mean "How can it be signaled multiple times simultaneously?"
>>
>> Okay yes, signaled. You could give same submit fence to multiple slaves,
>> but you can't have same slave request receive notification from multiple
>> masters.
>>
>> Or you can if you build a composite fence and pass that in? Is this the
>> story about signal-on-any vs signal-on-all?
> 
> There's nothing inherent in the design to prevent virtual_bond_execute
> being called multiple times given multiple fences along one or more
> engines.
> 
> There's a practical limitation in the proposed uAPI to limit it to a
> single submit-fence, but may indeed be a composite fence. There's also
> the question of whether to squeeze in syncobj support.

Ok. Just drop in a comment with the loop please.

>>>>>>> +
>>>>>>> +     err = check_user_mbz(&ext->flags);
>>>>>>> +     if (err)
>>>>>>> +             return err;
>>>>>>> +
>>>>>>> +     for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
>>>>>>> +             err = check_user_mbz(&ext->mbz64[n]);
>>>>>>> +             if (err)
>>>>>>> +                     return err;
>>>>>>> +     }
>>>>>>> +
>>>>>>> +     if (get_user(class, &ext->master_class))
>>>>>>> +             return -EFAULT;
>>>>>>> +
>>>>>>> +     if (get_user(instance, &ext->master_instance))
>>>>>>> +             return -EFAULT;
>>>>>>> +
>>>>>>> +     master = intel_engine_lookup_user(set->ctx->i915, class, instance);
>>>>>>> +     if (!master) {
>>>>>>> +             DRM_DEBUG("Unrecognised master engine: { class:%d, instance:%d }\n",
>>>>>>> +                       class, instance);
>>>>>>> +             return -EINVAL;
>>>>>>> +     }
>>>>>>> +
>>>>>>> +     if (get_user(num_bonds, &ext->num_bonds))
>>>>>>> +             return -EFAULT;
>>>>>>
>>>>>> Should num_bonds > virtual->num_siblings be an error?
>>>>>
>>>>> They could specify the same bond multiple times for whatever reason (and
>>>>> probably should allow skipping NONE?), if the target doesn't exist that's
>>>>> definitely an error.
>>>>
>>>> So which bond we pick if they specify multiple ones? Just the first one
>>>> found. Hm actually I was thinking about making sure each master is only
>>>> specified once, not siblings. For siblings we indeed do not care.
>>>
>>> No, it's a mask of if parent executes on master, use this set of
>>> children.
>>>
>>> I was reasonably happy to use a cumulative mask if master is specified
>>> by more than one bond ext; but maybe it should be an intersection. Hmm.
>>
>> Do you see a realistic and making sense use case for specifying the same
>> master in multiple bonds? If not I'd just disallow it and then we don't
>> have a question of union vs intersection policy.
> 
> Rather the opposite, I don't see that it breaks anything nor need it be
> ill-defined, hence no reason to reject as a means to protect oneself.

I don't mean it's dangerous or poorly defined. In that sense your 
current implementation of using an union is fine. (Earlier I forgot that 
you skip creating multiple bond objects in this case.)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (32 preceding siblings ...)
  2019-04-17 11:33 ` ✗ Fi.CI.BAT: failure for series starting with [01/32] " Patchwork
@ 2019-04-18 10:32 ` Tvrtko Ursulin
  2019-04-18 10:40   ` Chris Wilson
  2019-04-23 12:59 ` Tvrtko Ursulin
  34 siblings, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-18 10:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> Currently there is an underlying assumption that i915_request_unsubmit()
> is synchronous wrt the GPU -- that is the request is no longer in flight
> as we remove it. In the near future that may change, and this may upset
> our signaling as we can process an interrupt for that request while it
> is no longer in flight.

Is this the preempt-to-busy future?

> 
> CPU0					CPU1
> intel_engine_breadcrumbs_irq
> (queue request completion)
> 					i915_request_cancel_signaling
> ...					...
> 					i915_request_enable_signaling
> dma_fence_signal

In this case completed request is unsubmitted?

There will be some inherent problem preventing avoiding preempting 
completed requests? Window between MI_ARB_ENABLE in emit_fini_breadcrumb 
and context complete?

Regards,

Tvrtko

> 
> Hence in the time it took us to drop the lock to signal the request, a
> preemption event may have occurred and re-queued the request. In the
> process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and
> so reused the rq->signal_link that was in use on CPU0, leading to bad
> pointer chasing in intel_engine_breadcrumbs_irq.
> 
> A related issue was that if someone started listening for a signal on a
> completed but no longer in-flight request, we missed the opportunity to
> immediately signal that request.
> 
> Furthermore, as intel_contexts may be immediately released during
> request retirement, in order to be entirely sure that
> intel_engine_breadcrumbs_irq may no longer dereference the intel_context
> (ce->signals and ce->signal_link), we must wait for irq spinlock.
> 
> In order to prevent the race, we use a bit in the fence.flags to signal
> the transfer onto the signal list inside intel_engine_breadcrumbs_irq.
> For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then
> quickly signals to any outside observer that the fence is indeed signaled.
> 
> Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/dma-buf/dma-fence.c              |  1 +
>   drivers/gpu/drm/i915/i915_request.c      |  1 +
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 52 ++++++++++++++----------
>   3 files changed, 33 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 3aa8733f832a..9bf06042619a 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -29,6 +29,7 @@
>   
>   EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
>   EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
>   
>   static DEFINE_SPINLOCK(dma_fence_stub_lock);
>   static struct dma_fence dma_fence_stub;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index b836721d3b13..e0efc334463b 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -432,6 +432,7 @@ void __i915_request_submit(struct i915_request *request)
>   	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
>   
>   	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
> +	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
>   	    !i915_request_enable_breadcrumb(request))
>   		intel_engine_queue_breadcrumbs(engine);
>   
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 3cbffd400b1b..e19f84b006cc 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -23,6 +23,7 @@
>    */
>   
>   #include <linux/kthread.h>
> +#include <trace/events/dma_fence.h>
>   #include <uapi/linux/sched/types.h>
>   
>   #include "i915_drv.h"
> @@ -83,6 +84,7 @@ static inline bool __request_completed(const struct i915_request *rq)
>   void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   {
>   	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	const ktime_t timestamp = ktime_get();
>   	struct intel_context *ce, *cn;
>   	struct list_head *pos, *next;
>   	LIST_HEAD(signal);
> @@ -104,6 +106,11 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   
>   			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
>   					     &rq->fence.flags));
> +			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +
> +			if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +					     &rq->fence.flags))
> +				continue;
>   
>   			/*
>   			 * Queue for execution after dropping the signaling
> @@ -111,14 +118,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   			 * more signalers to the same context or engine.
>   			 */
>   			i915_request_get(rq);
> -
> -			/*
> -			 * We may race with direct invocation of
> -			 * dma_fence_signal(), e.g. i915_request_retire(),
> -			 * so we need to acquire our reference to the request
> -			 * before we cancel the breadcrumb.
> -			 */
> -			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   			list_add_tail(&rq->signal_link, &signal);
>   		}
>   
> @@ -140,8 +139,21 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   	list_for_each_safe(pos, next, &signal) {
>   		struct i915_request *rq =
>   			list_entry(pos, typeof(*rq), signal_link);
> +		struct dma_fence_cb *cur, *tmp;
> +
> +		trace_dma_fence_signaled(&rq->fence);
> +
> +		rq->fence.timestamp = timestamp;
> +		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &rq->fence.flags);
> +
> +		spin_lock(&rq->lock);
> +		list_for_each_entry_safe(cur, tmp, &rq->fence.cb_list, node) {
> +			INIT_LIST_HEAD(&cur->node);
> +			cur->func(&rq->fence, cur);
> +		}
> +		INIT_LIST_HEAD(&rq->fence.cb_list);
> +		spin_unlock(&rq->lock);
>   
> -		dma_fence_signal(&rq->fence);
>   		i915_request_put(rq);
>   	}
>   }
> @@ -243,19 +255,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
>   
>   bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   {
> -	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
> -
> -	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
> -
> -	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
> -		return true;
> +	lockdep_assert_held(&rq->lock);
> +	lockdep_assert_irqs_disabled();
>   
> -	spin_lock(&b->irq_lock);
> -	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
> -	    !__request_completed(rq)) {
> +	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
> +		struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
>   		struct intel_context *ce = rq->hw_context;
>   		struct list_head *pos;
>   
> +		spin_lock(&b->irq_lock);
> +		GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
> +
>   		__intel_breadcrumbs_arm_irq(b);
>   
>   		/*
> @@ -284,8 +294,8 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   			list_move_tail(&ce->signal_link, &b->signalers);
>   
>   		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +		spin_unlock(&b->irq_lock);
>   	}
> -	spin_unlock(&b->irq_lock);
>   
>   	return !__request_completed(rq);
>   }
> @@ -294,8 +304,8 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>   {
>   	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
>   
> -	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
> -		return;
> +	lockdep_assert_held(&rq->lock);
> +	lockdep_assert_irqs_disabled();
>   
>   	spin_lock(&b->irq_lock);
>   	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-18 10:32 ` [PATCH 01/32] " Tvrtko Ursulin
@ 2019-04-18 10:40   ` Chris Wilson
  0 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-18 10:40 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-18 11:32:31)
> 
> On 17/04/2019 08:56, Chris Wilson wrote:
> > Currently there is an underlying assumption that i915_request_unsubmit()
> > is synchronous wrt the GPU -- that is the request is no longer in flight
> > as we remove it. In the near future that may change, and this may upset
> > our signaling as we can process an interrupt for that request while it
> > is no longer in flight.
> 
> Is this the preempt-to-busy future?

Async unsubmit is preempt-to-busy, but one shouldn't rule out the guc
either :)

> 
> > 
> > CPU0                                  CPU1
> > intel_engine_breadcrumbs_irq
> > (queue request completion)
> >                                       i915_request_cancel_signaling
> > ...                                   ...
> >                                       i915_request_enable_signaling
> > dma_fence_signal
> 
> In this case completed request is unsubmitted?

The request completed after it was unsubmitted, yes.

> There will be some inherent problem preventing avoiding preempting 
> completed requests? Window between MI_ARB_ENABLE in emit_fini_breadcrumb 
> and context complete?

For preempt-to-busy, we stick a semaphore into the fini_breadcrumb to
prevent context completion while we perform the preemption.

Please note the second aspect wrt to dereferencing the intel_context
after we skip the cancel_breadcrumbs during retire is why I think this
fixes a present bug.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/
  2019-04-17  7:56 ` [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/ Chris Wilson
  2019-04-17  9:42   ` Tvrtko Ursulin
@ 2019-04-18 12:04   ` Joonas Lahtinen
  2019-04-23  8:57     ` Joonas Lahtinen
  1 sibling, 1 reply; 68+ messages in thread
From: Joonas Lahtinen @ 2019-04-18 12:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Jani Nikula, Rodrigo Vivi

+ Jani and Rodrigo to comment

I'm definitely all for doing this, so it's only a matter of the timing.

Question is, do we want to do it right now after last drm-intel-next was
tagged, or do we want to wait a couple of release candidates.

I'm leaning towards doing this ASAP, as git cherry-pick should
understand that they're just renames, so there should be no issue with
doing the -fixes.

Regards, Joonas

Quoting Chris Wilson (2019-04-17 10:56:32)
> Start partitioning off the code that talks to the hardware (GT) from the
> uapi layers and move the device facing code under gt/
> 
> One casualty is s/intel_ringbuffer.h/intel_engine.h/ with the plan to
> subdivide that header and body further (and split out the submission
> code from the ringbuffer and logical context handling). This patch aims
> to be simple motion so git can fixup inflight patches with little mess.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/Makefile                 | 46 ++++++++++++-------
>  drivers/gpu/drm/i915/Makefile.header-test     |  6 +--
>  drivers/gpu/drm/i915/gt/Makefile              |  2 +
>  drivers/gpu/drm/i915/gt/Makefile.header-test  | 16 +++++++
>  .../gpu/drm/i915/{ => gt}/intel_breadcrumbs.c |  0
>  drivers/gpu/drm/i915/{ => gt}/intel_context.c |  3 +-
>  drivers/gpu/drm/i915/{ => gt}/intel_context.h |  0
>  .../drm/i915/{ => gt}/intel_context_types.h   |  0
>  .../{intel_ringbuffer.h => gt/intel_engine.h} |  0
>  .../gpu/drm/i915/{ => gt}/intel_engine_cs.c   |  8 ++--
>  .../drm/i915/{ => gt}/intel_engine_types.h    |  5 +-
>  .../drm/i915/{ => gt}/intel_gpu_commands.h    |  0
>  .../gpu/drm/i915/{ => gt}/intel_hangcheck.c   |  4 +-
>  drivers/gpu/drm/i915/{ => gt}/intel_lrc.c     |  5 +-
>  drivers/gpu/drm/i915/{ => gt}/intel_lrc.h     |  4 +-
>  drivers/gpu/drm/i915/{ => gt}/intel_lrc_reg.h |  0
>  drivers/gpu/drm/i915/{ => gt}/intel_mocs.c    |  4 +-
>  drivers/gpu/drm/i915/{ => gt}/intel_mocs.h    |  4 +-
>  .../i915/{i915_reset.c => gt/intel_reset.c}   |  2 +-
>  .../i915/{i915_reset.h => gt/intel_reset.h}   |  2 +-
>  .../gpu/drm/i915/{ => gt}/intel_ringbuffer.c  |  3 +-
>  drivers/gpu/drm/i915/{ => gt}/intel_sseu.c    |  0
>  drivers/gpu/drm/i915/{ => gt}/intel_sseu.h    |  0
>  .../gpu/drm/i915/{ => gt}/intel_workarounds.c |  2 +-
>  .../gpu/drm/i915/{ => gt}/intel_workarounds.h |  8 +++-
>  .../i915/{ => gt}/intel_workarounds_types.h   |  0
>  .../drm/i915/{selftests => gt}/mock_engine.c  | 10 ++--
>  .../drm/i915/{selftests => gt}/mock_engine.h  |  2 +-
>  .../selftest_engine_cs.c}                     |  0
>  .../selftest_hangcheck.c}                     | 16 +++----
>  .../intel_lrc.c => gt/selftest_lrc.c}         | 16 +++----
>  .../selftest_workarounds.c}                   | 18 ++++----
>  drivers/gpu/drm/i915/i915_cmd_parser.c        |  3 +-
>  drivers/gpu/drm/i915/i915_debugfs.c           |  3 +-
>  drivers/gpu/drm/i915/i915_drv.c               |  5 +-
>  drivers/gpu/drm/i915/i915_drv.h               |  7 +--
>  drivers/gpu/drm/i915/i915_gem.c               |  7 +--
>  drivers/gpu/drm/i915/i915_gem_context.c       |  7 ++-
>  drivers/gpu/drm/i915/i915_gem_context.h       |  3 +-
>  drivers/gpu/drm/i915/i915_gem_context_types.h |  3 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c           |  1 -
>  drivers/gpu/drm/i915/i915_gem_gtt.h           |  2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.h         |  3 +-
>  drivers/gpu/drm/i915/i915_perf.c              |  3 +-
>  drivers/gpu/drm/i915/i915_pmu.c               |  4 +-
>  drivers/gpu/drm/i915/i915_request.c           |  1 -
>  drivers/gpu/drm/i915/i915_scheduler_types.h   |  2 +-
>  drivers/gpu/drm/i915/i915_trace.h             |  3 +-
>  drivers/gpu/drm/i915/i915_vma.c               |  3 +-
>  drivers/gpu/drm/i915/intel_device_info.h      |  6 ++-
>  drivers/gpu/drm/i915/intel_display.c          |  1 -
>  drivers/gpu/drm/i915/intel_guc_submission.c   |  3 +-
>  drivers/gpu/drm/i915/intel_guc_submission.h   |  3 +-
>  drivers/gpu/drm/i915/intel_uc.c               |  2 +-
>  .../gpu/drm/i915/selftests/i915_gem_context.c |  5 +-
>  drivers/gpu/drm/i915/selftests/igt_reset.c    |  3 +-
>  drivers/gpu/drm/i915/selftests/igt_spinner.h  |  3 +-
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |  3 +-
>  drivers/gpu/drm/i915/selftests/mock_request.c |  3 +-
>  59 files changed, 166 insertions(+), 112 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/gt/Makefile
>  create mode 100644 drivers/gpu/drm/i915/gt/Makefile.header-test
>  rename drivers/gpu/drm/i915/{ => gt}/intel_breadcrumbs.c (100%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_context.c (99%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_context.h (100%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_context_types.h (100%)
>  rename drivers/gpu/drm/i915/{intel_ringbuffer.h => gt/intel_engine.h} (100%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_engine_cs.c (99%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_engine_types.h (99%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_gpu_commands.h (100%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_hangcheck.c (99%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_lrc.c (99%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_lrc.h (98%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_lrc_reg.h (100%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_mocs.c (99%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_mocs.h (97%)
>  rename drivers/gpu/drm/i915/{i915_reset.c => gt/intel_reset.c} (99%)
>  rename drivers/gpu/drm/i915/{i915_reset.h => gt/intel_reset.h} (98%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_ringbuffer.c (99%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_sseu.c (100%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_sseu.h (100%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds.c (99%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds.h (88%)
>  rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds_types.h (100%)
>  rename drivers/gpu/drm/i915/{selftests => gt}/mock_engine.c (97%)
>  rename drivers/gpu/drm/i915/{selftests => gt}/mock_engine.h (98%)
>  rename drivers/gpu/drm/i915/{selftests/intel_engine_cs.c => gt/selftest_engine_cs.c} (100%)
>  rename drivers/gpu/drm/i915/{selftests/intel_hangcheck.c => gt/selftest_hangcheck.c} (99%)
>  rename drivers/gpu/drm/i915/{selftests/intel_lrc.c => gt/selftest_lrc.c} (99%)
>  rename drivers/gpu/drm/i915/{selftests/intel_workarounds.c => gt/selftest_workarounds.c} (98%)
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 53ff209b91bb..40130cf5c003 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -35,32 +35,53 @@ subdir-ccflags-y += \
>  # Extra header tests
>  include $(src)/Makefile.header-test
>  
> +subdir-ccflags-y += -I$(src)
> +
>  # Please keep these build lists sorted!
>  
>  # core driver code
>  i915-y += i915_drv.o \
>           i915_irq.o \
> -         i915_memcpy.o \
> -         i915_mm.o \
>           i915_params.o \
>           i915_pci.o \
> -         i915_reset.o \
>           i915_suspend.o \
> -         i915_sw_fence.o \
> -         i915_syncmap.o \
>           i915_sysfs.o \
> -         i915_user_extensions.o \
>           intel_csr.o \
>           intel_device_info.o \
>           intel_pm.o \
>           intel_runtime_pm.o \
> -         intel_workarounds.o
> +         intel_uncore.o
> +
> +# core library code
> +i915-y += \
> +       i915_memcpy.o \
> +       i915_mm.o \
> +       i915_sw_fence.o \
> +       i915_syncmap.o \
> +       i915_user_extensions.o
>  
>  i915-$(CONFIG_COMPAT)   += i915_ioc32.o
>  i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
>  i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>  
> -# GEM code
> +# "Graphics Technology" (aka we talk to the gpu)
> +obj-y += gt/
> +gt-y += \
> +       gt/intel_breadcrumbs.o \
> +       gt/intel_context.o \
> +       gt/intel_engine_cs.o \
> +       gt/intel_hangcheck.o \
> +       gt/intel_lrc.o \
> +       gt/intel_reset.o \
> +       gt/intel_ringbuffer.o \
> +       gt/intel_mocs.o \
> +       gt/intel_sseu.o \
> +       gt/intel_workarounds.o
> +gt-$(CONFIG_DRM_I915_SELFTEST) += \
> +       gt/mock_engine.o
> +i915-y += $(gt-y)
> +
> +# GEM (Graphics Execution Management) code
>  i915-y += \
>           i915_active.o \
>           i915_cmd_parser.o \
> @@ -88,15 +109,6 @@ i915-y += \
>           i915_timeline.o \
>           i915_trace_points.o \
>           i915_vma.o \
> -         intel_breadcrumbs.o \
> -         intel_context.o \
> -         intel_engine_cs.o \
> -         intel_hangcheck.o \
> -         intel_lrc.o \
> -         intel_mocs.o \
> -         intel_ringbuffer.o \
> -         intel_sseu.o \
> -         intel_uncore.o \
>           intel_wopcm.o
>  
>  # general-purpose microcontroller (GuC) support
> diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
> index 5bcc78d7ac96..96a5d90629ec 100644
> --- a/drivers/gpu/drm/i915/Makefile.header-test
> +++ b/drivers/gpu/drm/i915/Makefile.header-test
> @@ -13,13 +13,11 @@ header_test := \
>         intel_cdclk.h \
>         intel_color.h \
>         intel_connector.h \
> -       intel_context_types.h \
>         intel_crt.h \
>         intel_csr.h \
>         intel_ddi.h \
>         intel_dp.h \
>         intel_dvo.h \
> -       intel_engine_types.h \
>         intel_fbc.h \
>         intel_fbdev.h \
>         intel_frontbuffer.h \
> @@ -33,9 +31,7 @@ header_test := \
>         intel_psr.h \
>         intel_sdvo.h \
>         intel_sprite.h \
> -       intel_sseu.h \
> -       intel_tv.h \
> -       intel_workarounds_types.h
> +       intel_tv.h
>  
>  quiet_cmd_header_test = HDRTEST $@
>        cmd_header_test = echo "\#include \"$(<F)\"" > $@
> diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile
> new file mode 100644
> index 000000000000..1c75b5c9790c
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/Makefile
> @@ -0,0 +1,2 @@
> +# Extra header tests
> +include $(src)/Makefile.header-test
> diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test
> new file mode 100644
> index 000000000000..61e06cbb4b32
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/Makefile.header-test
> @@ -0,0 +1,16 @@
> +# SPDX-License-Identifier: MIT
> +# Copyright © 2019 Intel Corporation
> +
> +# Test the headers are compilable as standalone units
> +header_test := $(notdir $(wildcard $(src)/*.h))
> +
> +quiet_cmd_header_test = HDRTEST $@
> +      cmd_header_test = echo "\#include \"$(<F)\"" > $@
> +
> +header_test_%.c: %.h
> +       $(call cmd,header_test)
> +
> +extra-$(CONFIG_DRM_I915_WERROR) += \
> +       $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
> +
> +clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_breadcrumbs.c
> rename to drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_context.c
> rename to drivers/gpu/drm/i915/gt/intel_context.c
> index 961d1445833d..ebd1e5919a4a 100644
> --- a/drivers/gpu/drm/i915/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -7,8 +7,9 @@
>  #include "i915_drv.h"
>  #include "i915_gem_context.h"
>  #include "i915_globals.h"
> +
>  #include "intel_context.h"
> -#include "intel_ringbuffer.h"
> +#include "intel_engine.h"
>  
>  static struct i915_global_context {
>         struct i915_global base;
> diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_context.h
> rename to drivers/gpu/drm/i915/gt/intel_context.h
> diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_context_types.h
> rename to drivers/gpu/drm/i915/gt/intel_context_types.h
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_ringbuffer.h
> rename to drivers/gpu/drm/i915/gt/intel_engine.h
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_engine_cs.c
> rename to drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index ad2a683d97f7..21dd3f25e641 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -25,9 +25,10 @@
>  #include <drm/drm_print.h>
>  
>  #include "i915_drv.h"
> -#include "i915_reset.h"
> -#include "intel_ringbuffer.h"
> +
> +#include "intel_engine.h"
>  #include "intel_lrc.h"
> +#include "intel_reset.h"
>  
>  /* Haswell does have the CXT_SIZE register however it does not appear to be
>   * valid. Now, docs explain in dwords what is in the context object. The full
> @@ -1756,6 +1757,5 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
>  }
>  
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> -#include "selftests/mock_engine.c"
> -#include "selftests/intel_engine_cs.c"
> +#include "selftest_engine_cs.c"
>  #endif
> diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_engine_types.h
> rename to drivers/gpu/drm/i915/gt/intel_engine_types.h
> index d07a01b3ed0b..3adf58da6d2c 100644
> --- a/drivers/gpu/drm/i915/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -14,15 +14,14 @@
>  #include <linux/types.h>
>  
>  #include "i915_gem.h"
> +#include "i915_gem_batch_pool.h"
> +#include "i915_pmu.h"
>  #include "i915_priolist_types.h"
>  #include "i915_selftest.h"
>  #include "i915_timeline_types.h"
>  #include "intel_sseu.h"
>  #include "intel_workarounds_types.h"
>  
> -#include "i915_gem_batch_pool.h"
> -#include "i915_pmu.h"
> -
>  #define I915_MAX_SLICES        3
>  #define I915_MAX_SUBSLICES 8
>  
> diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_gpu_commands.h
> rename to drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_hangcheck.c
> rename to drivers/gpu/drm/i915/gt/intel_hangcheck.c
> index 3d51ed1428d4..3053a706a561 100644
> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> @@ -22,8 +22,8 @@
>   *
>   */
>  
> +#include "intel_reset.h"
>  #include "i915_drv.h"
> -#include "i915_reset.h"
>  
>  struct hangcheck {
>         u64 acthd;
> @@ -330,5 +330,5 @@ void intel_hangcheck_init(struct drm_i915_private *i915)
>  }
>  
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> -#include "selftests/intel_hangcheck.c"
> +#include "selftest_hangcheck.c"
>  #endif
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_lrc.c
> rename to drivers/gpu/drm/i915/gt/intel_lrc.c
> index 18a9dc6ca877..5cadf8f6a23d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -133,13 +133,12 @@
>   */
>  #include <linux/interrupt.h>
>  
> -#include <drm/i915_drm.h>
>  #include "i915_drv.h"
>  #include "i915_gem_render_state.h"
> -#include "i915_reset.h"
>  #include "i915_vgpu.h"
>  #include "intel_lrc_reg.h"
>  #include "intel_mocs.h"
> +#include "intel_reset.h"
>  #include "intel_workarounds.h"
>  
>  #define RING_EXECLIST_QFULL            (1 << 0x2)
> @@ -2905,5 +2904,5 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
>  }
>  
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> -#include "selftests/intel_lrc.c"
> +#include "selftest_lrc.c"
>  #endif
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> similarity index 98%
> rename from drivers/gpu/drm/i915/intel_lrc.h
> rename to drivers/gpu/drm/i915/gt/intel_lrc.h
> index 99f75ee9d087..1a33ec74af8c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -24,8 +24,7 @@
>  #ifndef _INTEL_LRC_H_
>  #define _INTEL_LRC_H_
>  
> -#include "intel_ringbuffer.h"
> -#include "i915_gem_context.h"
> +#include "intel_engine.h"
>  
>  /* Execlists regs */
>  #define RING_ELSP(base)                                _MMIO((base) + 0x230)
> @@ -99,7 +98,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
>  struct drm_printer;
>  
>  struct drm_i915_private;
> -struct i915_gem_context;
>  
>  void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_lrc_reg.h
> rename to drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_mocs.c
> rename to drivers/gpu/drm/i915/gt/intel_mocs.c
> index 274ba78500c0..79df66022d3a 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -20,9 +20,11 @@
>   * SOFTWARE.
>   */
>  
> +#include "i915_drv.h"
> +
> +#include "intel_engine.h"
>  #include "intel_mocs.h"
>  #include "intel_lrc.h"
> -#include "intel_ringbuffer.h"
>  
>  /* structures required */
>  struct drm_i915_mocs_entry {
> diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
> similarity index 97%
> rename from drivers/gpu/drm/i915/intel_mocs.h
> rename to drivers/gpu/drm/i915/gt/intel_mocs.h
> index 3d99d1271b2b..0913704a1af2 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
> @@ -49,7 +49,9 @@
>   * context handling keep the MOCS in step.
>   */
>  
> -#include "i915_drv.h"
> +struct drm_i915_private;
> +struct i915_request;
> +struct intel_engine_cs;
>  
>  int intel_rcs_context_init_mocs(struct i915_request *rq);
>  void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/i915_reset.c
> rename to drivers/gpu/drm/i915/gt/intel_reset.c
> index 677d59304e78..9731a2295639 100644
> --- a/drivers/gpu/drm/i915/i915_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -9,7 +9,7 @@
>  
>  #include "i915_drv.h"
>  #include "i915_gpu_error.h"
> -#include "i915_reset.h"
> +#include "intel_reset.h"
>  
>  #include "intel_guc.h"
>  
> diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
> similarity index 98%
> rename from drivers/gpu/drm/i915/i915_reset.h
> rename to drivers/gpu/drm/i915/gt/intel_reset.h
> index 3c0450289b8f..8e662bb43a9b 100644
> --- a/drivers/gpu/drm/i915/i915_reset.h
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.h
> @@ -11,7 +11,7 @@
>  #include <linux/types.h>
>  #include <linux/srcu.h>
>  
> -#include "intel_engine_types.h"
> +#include "gt/intel_engine_types.h"
>  
>  struct drm_i915_private;
>  struct i915_request;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_ringbuffer.c
> rename to drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index 029fd8ec1857..c1214fd25702 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -33,9 +33,8 @@
>  
>  #include "i915_drv.h"
>  #include "i915_gem_render_state.h"
> -#include "i915_reset.h"
>  #include "i915_trace.h"
> -#include "intel_drv.h"
> +#include "intel_reset.h"
>  #include "intel_workarounds.h"
>  
>  /* Rough estimate of the typical request size, performing a flush,
> diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_sseu.c
> rename to drivers/gpu/drm/i915/gt/intel_sseu.c
> diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_sseu.h
> rename to drivers/gpu/drm/i915/gt/intel_sseu.h
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/intel_workarounds.c
> rename to drivers/gpu/drm/i915/gt/intel_workarounds.c
> index b3cbed1ee1c9..f46ed0e2f07c 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -1398,5 +1398,5 @@ int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
>  }
>  
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> -#include "selftests/intel_workarounds.c"
> +#include "selftest_workarounds.c"
>  #endif
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h
> similarity index 88%
> rename from drivers/gpu/drm/i915/intel_workarounds.h
> rename to drivers/gpu/drm/i915/gt/intel_workarounds.h
> index fdf7ebb90f28..3761a6ee58bb 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.h
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h
> @@ -4,13 +4,17 @@
>   * Copyright © 2014-2018 Intel Corporation
>   */
>  
> -#ifndef _I915_WORKAROUNDS_H_
> -#define _I915_WORKAROUNDS_H_
> +#ifndef _INTEL_WORKAROUNDS_H_
> +#define _INTEL_WORKAROUNDS_H_
>  
>  #include <linux/slab.h>
>  
>  #include "intel_workarounds_types.h"
>  
> +struct drm_i915_private;
> +struct i915_request;
> +struct intel_engine_cs;
> +
>  static inline void intel_wa_list_free(struct i915_wa_list *wal)
>  {
>         kfree(wal->list);
> diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
> similarity index 100%
> rename from drivers/gpu/drm/i915/intel_workarounds_types.h
> rename to drivers/gpu/drm/i915/gt/intel_workarounds_types.h
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> similarity index 97%
> rename from drivers/gpu/drm/i915/selftests/mock_engine.c
> rename to drivers/gpu/drm/i915/gt/mock_engine.c
> index 61a8206ed677..414afd2f27fe 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -22,8 +22,11 @@
>   *
>   */
>  
> +#include "i915_drv.h"
> +#include "intel_context.h"
> +
>  #include "mock_engine.h"
> -#include "mock_request.h"
> +#include "selftests/mock_request.h"
>  
>  struct mock_ring {
>         struct intel_ring base;
> @@ -268,8 +271,9 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
>         timer_setup(&engine->hw_delay, hw_delay_complete, 0);
>         INIT_LIST_HEAD(&engine->hw_queue);
>  
> -       if (pin_context(i915->kernel_context, &engine->base,
> -                       &engine->base.kernel_context))
> +       engine->base.kernel_context =
> +               intel_context_pin(i915->kernel_context, &engine->base);
> +       if (IS_ERR(engine->base.kernel_context))
>                 goto err_breadcrumbs;
>  
>         return &engine->base;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h
> similarity index 98%
> rename from drivers/gpu/drm/i915/selftests/mock_engine.h
> rename to drivers/gpu/drm/i915/gt/mock_engine.h
> index b9cc3a245f16..44b35a85e9d1 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.h
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.h
> @@ -29,7 +29,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/timer.h>
>  
> -#include "../intel_ringbuffer.h"
> +#include "gt/intel_engine.h"
>  
>  struct mock_engine {
>         struct intel_engine_cs base;
> diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> similarity index 100%
> rename from drivers/gpu/drm/i915/selftests/intel_engine_cs.c
> rename to drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> rename to drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 050bd1e19e02..87c26920212f 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -24,14 +24,14 @@
>  
>  #include <linux/kthread.h>
>  
> -#include "../i915_selftest.h"
> -#include "i915_random.h"
> -#include "igt_flush_test.h"
> -#include "igt_reset.h"
> -#include "igt_wedge_me.h"
> -
> -#include "mock_context.h"
> -#include "mock_drm.h"
> +#include "i915_selftest.h"
> +#include "selftests/i915_random.h"
> +#include "selftests/igt_flush_test.h"
> +#include "selftests/igt_reset.h"
> +#include "selftests/igt_wedge_me.h"
> +
> +#include "selftests/mock_context.h"
> +#include "selftests/mock_drm.h"
>  
>  #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
>  
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> similarity index 99%
> rename from drivers/gpu/drm/i915/selftests/intel_lrc.c
> rename to drivers/gpu/drm/i915/gt/selftest_lrc.c
> index fbee030db940..cd0551f97c2f 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -6,15 +6,13 @@
>  
>  #include <linux/prime_numbers.h>
>  
> -#include "../i915_reset.h"
> -
> -#include "../i915_selftest.h"
> -#include "igt_flush_test.h"
> -#include "igt_live_test.h"
> -#include "igt_spinner.h"
> -#include "i915_random.h"
> -
> -#include "mock_context.h"
> +#include "gt/intel_reset.h"
> +#include "i915_selftest.h"
> +#include "selftests/i915_random.h"
> +#include "selftests/igt_flush_test.h"
> +#include "selftests/igt_live_test.h"
> +#include "selftests/igt_spinner.h"
> +#include "selftests/mock_context.h"
>  
>  static int live_sanitycheck(void *arg)
>  {
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> similarity index 98%
> rename from drivers/gpu/drm/i915/selftests/intel_workarounds.c
> rename to drivers/gpu/drm/i915/gt/selftest_workarounds.c
> index 6f941c31dcab..96c6282f3a10 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> @@ -4,15 +4,15 @@
>   * Copyright © 2018 Intel Corporation
>   */
>  
> -#include "../i915_selftest.h"
> -#include "../i915_reset.h"
> -
> -#include "igt_flush_test.h"
> -#include "igt_reset.h"
> -#include "igt_spinner.h"
> -#include "igt_wedge_me.h"
> -#include "mock_context.h"
> -#include "mock_drm.h"
> +#include "i915_selftest.h"
> +#include "intel_reset.h"
> +
> +#include "selftests/igt_flush_test.h"
> +#include "selftests/igt_reset.h"
> +#include "selftests/igt_spinner.h"
> +#include "selftests/igt_wedge_me.h"
> +#include "selftests/mock_context.h"
> +#include "selftests/mock_drm.h"
>  
>  static const struct wo_register {
>         enum intel_platform platform;
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 503d548a55f7..e9fadcb4d592 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -25,8 +25,9 @@
>   *
>   */
>  
> +#include "gt/intel_engine.h"
> +
>  #include "i915_drv.h"
> -#include "intel_ringbuffer.h"
>  
>  /**
>   * DOC: batch buffer command parser
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 5823ffb17821..3f039758b152 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -32,7 +32,8 @@
>  #include <drm/drm_debugfs.h>
>  #include <drm/drm_fourcc.h>
>  
> -#include "i915_reset.h"
> +#include "gt/intel_reset.h"
> +
>  #include "intel_dp.h"
>  #include "intel_drv.h"
>  #include "intel_fbc.h"
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 1ad88e6d7c04..98b997526daa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -47,10 +47,12 @@
>  #include <drm/drm_probe_helper.h>
>  #include <drm/i915_drm.h>
>  
> +#include "gt/intel_workarounds.h"
> +#include "gt/intel_reset.h"
> +
>  #include "i915_drv.h"
>  #include "i915_pmu.h"
>  #include "i915_query.h"
> -#include "i915_reset.h"
>  #include "i915_trace.h"
>  #include "i915_vgpu.h"
>  #include "intel_audio.h"
> @@ -62,7 +64,6 @@
>  #include "intel_pm.h"
>  #include "intel_sprite.h"
>  #include "intel_uc.h"
> -#include "intel_workarounds.h"
>  
>  static struct drm_driver driver;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7b5da9eddc1c..fad5306f07da 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -62,18 +62,19 @@
>  #include "i915_reg.h"
>  #include "i915_utils.h"
>  
> +#include "gt/intel_lrc.h"
> +#include "gt/intel_engine.h"
> +#include "gt/intel_workarounds.h"
> +
>  #include "intel_bios.h"
>  #include "intel_device_info.h"
>  #include "intel_display.h"
>  #include "intel_dpll_mgr.h"
>  #include "intel_frontbuffer.h"
> -#include "intel_lrc.h"
>  #include "intel_opregion.h"
> -#include "intel_ringbuffer.h"
>  #include "intel_uc.h"
>  #include "intel_uncore.h"
>  #include "intel_wopcm.h"
> -#include "intel_workarounds.h"
>  
>  #include "i915_gem.h"
>  #include "i915_gem_context.h"
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a5412323fee1..9554960977a3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -39,19 +39,20 @@
>  #include <linux/dma-buf.h>
>  #include <linux/mman.h>
>  
> +#include "gt/intel_mocs.h"
> +#include "gt/intel_reset.h"
> +#include "gt/intel_workarounds.h"
> +
>  #include "i915_drv.h"
>  #include "i915_gem_clflush.h"
>  #include "i915_gemfs.h"
>  #include "i915_globals.h"
> -#include "i915_reset.h"
>  #include "i915_trace.h"
>  #include "i915_vgpu.h"
>  
>  #include "intel_drv.h"
>  #include "intel_frontbuffer.h"
> -#include "intel_mocs.h"
>  #include "intel_pm.h"
> -#include "intel_workarounds.h"
>  
>  static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index c02a30612df9..37dff694456c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -86,13 +86,16 @@
>   */
>  
>  #include <linux/log2.h>
> +
>  #include <drm/i915_drm.h>
> +
> +#include "gt/intel_lrc_reg.h"
> +#include "gt/intel_workarounds.h"
> +
>  #include "i915_drv.h"
>  #include "i915_globals.h"
>  #include "i915_trace.h"
>  #include "i915_user_extensions.h"
> -#include "intel_lrc_reg.h"
> -#include "intel_workarounds.h"
>  
>  #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
>  #define I915_CONTEXT_PARAM_VM 0x9
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 23dcb01bfd82..cec278ab04e2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -27,9 +27,10 @@
>  
>  #include "i915_gem_context_types.h"
>  
> +#include "gt/intel_context.h"
> +
>  #include "i915_gem.h"
>  #include "i915_scheduler.h"
> -#include "intel_context.h"
>  #include "intel_device_info.h"
>  
>  struct drm_device;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> index e2ec58b10fb2..d282a6ab3b9f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> @@ -17,8 +17,9 @@
>  #include <linux/rcupdate.h>
>  #include <linux/types.h>
>  
> +#include "gt/intel_context_types.h"
> +
>  #include "i915_scheduler.h"
> -#include "intel_context_types.h"
>  
>  struct pid;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8f460cc4cc1f..aab778728ea2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -37,7 +37,6 @@
>  
>  #include "i915_drv.h"
>  #include "i915_vgpu.h"
> -#include "i915_reset.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  #include "intel_frontbuffer.h"
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index f597f35b109b..c8d96e91f3dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -38,8 +38,8 @@
>  #include <linux/mm.h>
>  #include <linux/pagevec.h>
>  
> +#include "gt/intel_reset.h"
>  #include "i915_request.h"
> -#include "i915_reset.h"
>  #include "i915_selftest.h"
>  #include "i915_timeline.h"
>  
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 5dc761e85d9d..b419d0f59275 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -13,8 +13,9 @@
>  
>  #include <drm/drm_mm.h>
>  
> +#include "gt/intel_engine.h"
> +
>  #include "intel_device_info.h"
> -#include "intel_ringbuffer.h"
>  #include "intel_uc_fw.h"
>  
>  #include "i915_gem.h"
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 56da457bed21..a87f790335c1 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -195,6 +195,8 @@
>  #include <linux/sizes.h>
>  #include <linux/uuid.h>
>  
> +#include "gt/intel_lrc_reg.h"
> +
>  #include "i915_drv.h"
>  #include "i915_oa_hsw.h"
>  #include "i915_oa_bdw.h"
> @@ -210,7 +212,6 @@
>  #include "i915_oa_cflgt3.h"
>  #include "i915_oa_cnl.h"
>  #include "i915_oa_icl.h"
> -#include "intel_lrc_reg.h"
>  
>  /* HW requires this to be a power of two, between 128k and 16M, though driver
>   * is currently generally designed assuming the largest 16M size is used such
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 46a52da3db29..35e502481f29 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -6,8 +6,10 @@
>  
>  #include <linux/irq.h>
>  #include <linux/pm_runtime.h>
> +
> +#include "gt/intel_engine.h"
> +
>  #include "i915_pmu.h"
> -#include "intel_ringbuffer.h"
>  #include "i915_drv.h"
>  
>  /* Frequency for the sampling timer for events which need it. */
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index e0efc334463b..74ae698c1f95 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -32,7 +32,6 @@
>  #include "i915_active.h"
>  #include "i915_drv.h"
>  #include "i915_globals.h"
> -#include "i915_reset.h"
>  #include "intel_pm.h"
>  
>  struct execute_cb {
> diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> index f1af3916a808..166a457884b2 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> @@ -9,8 +9,8 @@
>  
>  #include <linux/list.h>
>  
> +#include "gt/intel_engine_types.h"
>  #include "i915_priolist_types.h"
> -#include "intel_engine_types.h"
>  
>  struct drm_i915_private;
>  struct i915_request;
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 12893304c8f8..b5286f3d8146 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -8,9 +8,10 @@
>  
>  #include <drm/drm_drv.h>
>  
> +#include "gt/intel_engine.h"
> +
>  #include "i915_drv.h"
>  #include "intel_drv.h"
> -#include "intel_ringbuffer.h"
>  
>  #undef TRACE_SYSTEM
>  #define TRACE_SYSTEM i915
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 36726392e737..d4d308b6d1d8 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -22,11 +22,12 @@
>   *
>   */
>  
> +#include "gt/intel_engine.h"
> +
>  #include "i915_vma.h"
>  
>  #include "i915_drv.h"
>  #include "i915_globals.h"
> -#include "intel_ringbuffer.h"
>  #include "intel_frontbuffer.h"
>  
>  #include <drm/drm_gem.h>
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 3045e0dee2a1..aa89a9adeffb 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -27,9 +27,11 @@
>  
>  #include <uapi/drm/i915_drm.h>
>  
> -#include "intel_engine_types.h"
> +#include "gt/intel_engine_types.h"
> +#include "gt/intel_context_types.h"
> +#include "gt/intel_sseu.h"
> +
>  #include "intel_display.h"
> -#include "intel_sseu.h"
>  
>  struct drm_printer;
>  struct drm_i915_private;
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 3bd40a4a6739..24e70d46b872 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -46,7 +46,6 @@
>  
>  #include "i915_drv.h"
>  #include "i915_gem_clflush.h"
> -#include "i915_reset.h"
>  #include "i915_trace.h"
>  #include "intel_atomic_plane.h"
>  #include "intel_color.h"
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 37f60cb8e9e1..1b6d6403ee92 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -25,8 +25,9 @@
>  #include <linux/circ_buf.h>
>  #include <trace/events/dma_fence.h>
>  
> +#include "gt/intel_lrc_reg.h"
> +
>  #include "intel_guc_submission.h"
> -#include "intel_lrc_reg.h"
>  #include "i915_drv.h"
>  
>  #define GUC_PREEMPT_FINISHED           0x1
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h
> index aa5e6749c925..7d823a513b9c 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.h
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.h
> @@ -27,9 +27,10 @@
>  
>  #include <linux/spinlock.h>
>  
> +#include "gt/intel_engine_types.h"
> +
>  #include "i915_gem.h"
>  #include "i915_selftest.h"
> -#include "intel_engine_types.h"
>  
>  struct drm_i915_private;
>  
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 25b80ffe71ad..13f823ff8083 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -22,11 +22,11 @@
>   *
>   */
>  
> +#include "gt/intel_reset.h"
>  #include "intel_uc.h"
>  #include "intel_guc_submission.h"
>  #include "intel_guc.h"
>  #include "i915_drv.h"
> -#include "i915_reset.h"
>  
>  static void guc_free_load_err_log(struct intel_guc *guc);
>  
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index e1cb22f03e8e..6f52ca881173 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -24,8 +24,9 @@
>  
>  #include <linux/prime_numbers.h>
>  
> -#include "../i915_reset.h"
> -#include "../i915_selftest.h"
> +#include "gt/intel_reset.h"
> +#include "i915_selftest.h"
> +
>  #include "i915_random.h"
>  #include "igt_flush_test.h"
>  #include "igt_live_test.h"
> diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c
> index 208a966da8ca..4f31b137c428 100644
> --- a/drivers/gpu/drm/i915/selftests/igt_reset.c
> +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c
> @@ -6,8 +6,9 @@
>  
>  #include "igt_reset.h"
>  
> +#include "gt/intel_engine.h"
> +
>  #include "../i915_drv.h"
> -#include "../intel_ringbuffer.h"
>  
>  void igt_global_reset_lock(struct drm_i915_private *i915)
>  {
> diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h
> index 391777c76dc7..d312e7cdab68 100644
> --- a/drivers/gpu/drm/i915/selftests/igt_spinner.h
> +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h
> @@ -9,9 +9,10 @@
>  
>  #include "../i915_selftest.h"
>  
> +#include "gt/intel_engine.h"
> +
>  #include "../i915_drv.h"
>  #include "../i915_request.h"
> -#include "../intel_ringbuffer.h"
>  #include "../i915_gem_context.h"
>  
>  struct igt_spinner {
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 60bbf8b4df40..f444ee5add27 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -25,7 +25,8 @@
>  #include <linux/pm_domain.h>
>  #include <linux/pm_runtime.h>
>  
> -#include "mock_engine.h"
> +#include "gt/mock_engine.h"
> +
>  #include "mock_context.h"
>  #include "mock_request.h"
>  #include "mock_gem_device.h"
> diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
> index d1a7c9608712..f739ba63057f 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_request.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_request.c
> @@ -22,7 +22,8 @@
>   *
>   */
>  
> -#include "mock_engine.h"
> +#include "gt/mock_engine.h"
> +
>  #include "mock_request.h"
>  
>  struct i915_request *
> -- 
> 2.20.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/32] drm/i915: Invert the GEM wakeref hierarchy
  2019-04-17  7:56 ` [PATCH 12/32] drm/i915: Invert the GEM wakeref hierarchy Chris Wilson
@ 2019-04-18 12:42   ` Tvrtko Ursulin
  2019-04-18 13:07     ` Chris Wilson
  2019-04-23 13:02   ` Tvrtko Ursulin
  1 sibling, 1 reply; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-18 12:42 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> In the current scheme, on submitting a request we take a single global
> GEM wakeref, which trickles down to wake up all GT power domains. This
> is undesirable as we would like to be able to localise our power
> management to the available power domains and to remove the global GEM
> operations from the heart of the driver. (The intent there is to push
> global GEM decisions to the boundary as used by the GEM user interface.)
> 
> Now during request construction, each request is responsible via its
> logical context to acquire a wakeref on each power domain it intends to
> utilize. Currently, each request takes a wakeref on the engine(s) and
> the engines themselves take a chipset wakeref. This gives us a
> transition on each engine which we can extend if we want to insert more
> powermangement control (such as soft rc6). The global GEM operations
> that currently require a struct_mutex are reduced to listening to pm
> events from the chipset GT wakeref. As we reduce the struct_mutex
> requirement, these listeners should evaporate.
> 
> Perhaps the biggest immediate change is that this removes the
> struct_mutex requirement around GT power management, allowing us greater
> flexibility in request construction. Another important knock-on effect,
> is that by tracking engine usage, we can insert a switch back to the
> kernel context on that engine immediately, avoiding any extra delay or
> inserting global synchronisation barriers. This makes tracking when an
> engine and its associated contexts are idle much easier -- important for
> when we forgo our assumed execution ordering and need idle barriers to
> unpin used contexts. In the process, it means we remove a large chunk of
> code whose only purpose was to switch back to the kernel context.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Imre Deak <imre.deak@intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile                 |   2 +
>   drivers/gpu/drm/i915/gt/intel_context.c       |  18 +-
>   drivers/gpu/drm/i915/gt/intel_engine.h        |   9 +-
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 142 +---------
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c     | 153 ++++++++++
>   drivers/gpu/drm/i915/gt/intel_engine_pm.h     |  20 ++
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |   7 +-
>   drivers/gpu/drm/i915/gt/intel_gt_pm.c         | 143 ++++++++++
>   drivers/gpu/drm/i915/gt/intel_gt_pm.h         |  27 ++
>   drivers/gpu/drm/i915/gt/intel_hangcheck.c     |   7 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |   6 +-
>   drivers/gpu/drm/i915/gt/intel_reset.c         | 101 +------
>   drivers/gpu/drm/i915/gt/intel_reset.h         |   1 -
>   drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  16 +-
>   drivers/gpu/drm/i915/gt/mock_engine.c         |   3 +
>   drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  49 +---
>   .../gpu/drm/i915/gt/selftest_workarounds.c    |   5 +-
>   drivers/gpu/drm/i915/i915_debugfs.c           |  16 +-
>   drivers/gpu/drm/i915/i915_drv.c               |   5 +-
>   drivers/gpu/drm/i915/i915_drv.h               |   8 +-
>   drivers/gpu/drm/i915/i915_gem.c               |  41 +--
>   drivers/gpu/drm/i915/i915_gem.h               |   3 -
>   drivers/gpu/drm/i915/i915_gem_context.c       |  85 +-----
>   drivers/gpu/drm/i915/i915_gem_context.h       |   4 -
>   drivers/gpu/drm/i915/i915_gem_evict.c         |  47 +---
>   drivers/gpu/drm/i915/i915_gem_pm.c            | 264 ++++++------------
>   drivers/gpu/drm/i915/i915_gem_pm.h            |   3 -
>   drivers/gpu/drm/i915/i915_gpu_error.h         |   4 -
>   drivers/gpu/drm/i915/i915_request.c           |  10 +-
>   drivers/gpu/drm/i915/i915_request.h           |   2 +-
>   drivers/gpu/drm/i915/intel_uc.c               |  22 +-
>   drivers/gpu/drm/i915/intel_uc.h               |   2 +-
>   drivers/gpu/drm/i915/selftests/i915_gem.c     |  16 +-
>   .../gpu/drm/i915/selftests/i915_gem_context.c | 114 +-------
>   .../gpu/drm/i915/selftests/i915_gem_object.c  |  29 +-
>   .../gpu/drm/i915/selftests/igt_flush_test.c   |  32 ++-
>   .../gpu/drm/i915/selftests/mock_gem_device.c  |  15 +-
>   37 files changed, 598 insertions(+), 833 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pm.c
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pm.h
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm.c
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 858642c7bc40..dd8d923aa1c6 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -71,6 +71,8 @@ gt-y += \
>   	gt/intel_breadcrumbs.o \
>   	gt/intel_context.o \
>   	gt/intel_engine_cs.o \
> +	gt/intel_engine_pm.o \
> +	gt/intel_gt_pm.o \
>   	gt/intel_hangcheck.o \
>   	gt/intel_lrc.o \
>   	gt/intel_reset.o \
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 4410e20e8e13..298e463ad082 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -10,6 +10,7 @@
>   
>   #include "intel_context.h"
>   #include "intel_engine.h"
> +#include "intel_engine_pm.h"
>   
>   static struct i915_global_context {
>   	struct i915_global base;
> @@ -162,7 +163,11 @@ intel_context_pin(struct i915_gem_context *ctx,
>   		return ERR_PTR(-EINTR);
>   
>   	if (likely(!atomic_read(&ce->pin_count))) {
> -		err = ce->ops->pin(ce);
> +		intel_wakeref_t wakeref;
> +
> +		err = 0;
> +		with_intel_runtime_pm(ce->engine->i915, wakeref)
> +			err = ce->ops->pin(ce);
>   		if (err)
>   			goto err;
>   
> @@ -269,17 +274,10 @@ int __init i915_global_context_init(void)
>   
>   void intel_context_enter_engine(struct intel_context *ce)
>   {
> -	struct drm_i915_private *i915 = ce->gem_context->i915;
> -
> -	if (!i915->gt.active_requests++)
> -		i915_gem_unpark(i915);
> +	intel_engine_pm_get(ce->engine);
>   }
>   
>   void intel_context_exit_engine(struct intel_context *ce)
>   {
> -	struct drm_i915_private *i915 = ce->gem_context->i915;
> -
> -	GEM_BUG_ON(!i915->gt.active_requests);
> -	if (!--i915->gt.active_requests)
> -		i915_gem_park(i915);
> +	intel_engine_pm_put(ce->engine);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 72c7c337ace9..a228dc1774d8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -382,6 +382,8 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
>   void intel_engine_get_instdone(struct intel_engine_cs *engine,
>   			       struct intel_instdone *instdone);
>   
> +void intel_engine_init_execlists(struct intel_engine_cs *engine);
> +
>   void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
>   void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
>   
> @@ -458,19 +460,14 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
>   {
>   	if (engine->reset.reset)
>   		engine->reset.reset(engine, stalled);
> +	engine->serial++; /* contexts lost */
>   }
>   
> -void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
> -void intel_gt_resume(struct drm_i915_private *i915);
> -
>   bool intel_engine_is_idle(struct intel_engine_cs *engine);
>   bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
>   
>   void intel_engine_lost_context(struct intel_engine_cs *engine);
>   
> -void intel_engines_park(struct drm_i915_private *i915);
> -void intel_engines_unpark(struct drm_i915_private *i915);
> -
>   void intel_engines_reset_default_submission(struct drm_i915_private *i915);
>   unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 21dd3f25e641..268dfb8e16ff 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -27,6 +27,7 @@
>   #include "i915_drv.h"
>   
>   #include "intel_engine.h"
> +#include "intel_engine_pm.h"
>   #include "intel_lrc.h"
>   #include "intel_reset.h"
>   
> @@ -451,7 +452,7 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
>   	i915_gem_batch_pool_init(&engine->batch_pool, engine);
>   }
>   
> -static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> +void intel_engine_init_execlists(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   
> @@ -584,10 +585,11 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
>   	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
>   
>   	intel_engine_init_breadcrumbs(engine);
> -	intel_engine_init_execlist(engine);
> +	intel_engine_init_execlists(engine);
>   	intel_engine_init_hangcheck(engine);
>   	intel_engine_init_batch_pool(engine);
>   	intel_engine_init_cmd_parser(engine);
> +	intel_engine_init__pm(engine);
>   
>   	/* Use the whole device by default */
>   	engine->sseu =
> @@ -758,30 +760,6 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>   	return ret;
>   }
>   
> -void intel_gt_resume(struct drm_i915_private *i915)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	/*
> -	 * After resume, we may need to poke into the pinned kernel
> -	 * contexts to paper over any damage caused by the sudden suspend.
> -	 * Only the kernel contexts should remain pinned over suspend,
> -	 * allowing us to fixup the user contexts on their first pin.
> -	 */
> -	for_each_engine(engine, i915, id) {
> -		struct intel_context *ce;
> -
> -		ce = engine->kernel_context;
> -		if (ce)
> -			ce->ops->reset(ce);
> -
> -		ce = engine->preempt_context;
> -		if (ce)
> -			ce->ops->reset(ce);
> -	}
> -}
> -
>   /**
>    * intel_engines_cleanup_common - cleans up the engine state created by
>    *                                the common initiailizers.
> @@ -1128,117 +1106,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
>   		engine->set_default_submission(engine);
>   }
>   
> -static bool reset_engines(struct drm_i915_private *i915)
> -{
> -	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
> -		return false;
> -
> -	return intel_gpu_reset(i915, ALL_ENGINES) == 0;
> -}
> -
> -/**
> - * intel_engines_sanitize: called after the GPU has lost power
> - * @i915: the i915 device
> - * @force: ignore a failed reset and sanitize engine state anyway
> - *
> - * Anytime we reset the GPU, either with an explicit GPU reset or through a
> - * PCI power cycle, the GPU loses state and we must reset our state tracking
> - * to match. Note that calling intel_engines_sanitize() if the GPU has not
> - * been reset results in much confusion!
> - */
> -void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	GEM_TRACE("\n");
> -
> -	if (!reset_engines(i915) && !force)
> -		return;
> -
> -	for_each_engine(engine, i915, id)
> -		intel_engine_reset(engine, false);
> -}
> -
> -/**
> - * intel_engines_park: called when the GT is transitioning from busy->idle
> - * @i915: the i915 device
> - *
> - * The GT is now idle and about to go to sleep (maybe never to wake again?).
> - * Time for us to tidy and put away our toys (release resources back to the
> - * system).
> - */
> -void intel_engines_park(struct drm_i915_private *i915)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, i915, id) {
> -		/* Flush the residual irq tasklets first. */
> -		intel_engine_disarm_breadcrumbs(engine);
> -		tasklet_kill(&engine->execlists.tasklet);
> -
> -		/*
> -		 * We are committed now to parking the engines, make sure there
> -		 * will be no more interrupts arriving later and the engines
> -		 * are truly idle.
> -		 */
> -		if (wait_for(intel_engine_is_idle(engine), 10)) {
> -			struct drm_printer p = drm_debug_printer(__func__);
> -
> -			dev_err(i915->drm.dev,
> -				"%s is not idle before parking\n",
> -				engine->name);
> -			intel_engine_dump(engine, &p, NULL);
> -		}
> -
> -		/* Must be reset upon idling, or we may miss the busy wakeup. */
> -		GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
> -
> -		if (engine->park)
> -			engine->park(engine);
> -
> -		if (engine->pinned_default_state) {
> -			i915_gem_object_unpin_map(engine->default_state);
> -			engine->pinned_default_state = NULL;
> -		}
> -
> -		i915_gem_batch_pool_fini(&engine->batch_pool);
> -		engine->execlists.no_priolist = false;
> -	}
> -
> -	i915->gt.active_engines = 0;
> -}
> -
> -/**
> - * intel_engines_unpark: called when the GT is transitioning from idle->busy
> - * @i915: the i915 device
> - *
> - * The GT was idle and now about to fire up with some new user requests.
> - */
> -void intel_engines_unpark(struct drm_i915_private *i915)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, i915, id) {
> -		void *map;
> -
> -		/* Pin the default state for fast resets from atomic context. */
> -		map = NULL;
> -		if (engine->default_state)
> -			map = i915_gem_object_pin_map(engine->default_state,
> -						      I915_MAP_WB);
> -		if (!IS_ERR_OR_NULL(map))
> -			engine->pinned_default_state = map;
> -
> -		if (engine->unpark)
> -			engine->unpark(engine);
> -
> -		intel_engine_init_hangcheck(engine);
> -	}
> -}
> -
>   /**
>    * intel_engine_lost_context: called when the GPU is reset into unknown state
>    * @engine: the engine
> @@ -1523,6 +1390,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	if (i915_reset_failed(engine->i915))
>   		drm_printf(m, "*** WEDGED ***\n");
>   
> +	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
>   	drm_printf(m, "\tHangcheck %x:%x [%d ms]\n",
>   		   engine->hangcheck.last_seqno,
>   		   engine->hangcheck.next_seqno,
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> new file mode 100644
> index 000000000000..cc0adfa14947
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -0,0 +1,153 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +
> +#include "intel_engine.h"
> +#include "intel_engine_pm.h"
> +#include "intel_gt_pm.h"
> +
> +static int intel_engine_unpark(struct intel_wakeref *wf)
> +{
> +	struct intel_engine_cs *engine =
> +		container_of(wf, typeof(*engine), wakeref);
> +	void *map;
> +
> +	GEM_TRACE("%s\n", engine->name);
> +
> +	intel_gt_pm_get(engine->i915);
> +
> +	/* Pin the default state for fast resets from atomic context. */
> +	map = NULL;
> +	if (engine->default_state)
> +		map = i915_gem_object_pin_map(engine->default_state,
> +					      I915_MAP_WB);
> +	if (!IS_ERR_OR_NULL(map))
> +		engine->pinned_default_state = map;
> +
> +	if (engine->unpark)
> +		engine->unpark(engine);
> +
> +	intel_engine_init_hangcheck(engine);
> +	return 0;
> +}
> +
> +void intel_engine_pm_get(struct intel_engine_cs *engine)
> +{
> +	intel_wakeref_get(engine->i915, &engine->wakeref, intel_engine_unpark);
> +}
> +
> +static bool switch_to_kernel_context(struct intel_engine_cs *engine)
> +{
> +	struct i915_request *rq;
> +
> +	/* Already inside the kernel context, safe to power down. */
> +	if (engine->wakeref_serial == engine->serial)
> +		return true;
> +
> +	/* GPU is pointing to the void, as good as in the kernel context. */
> +	if (i915_reset_failed(engine->i915))
> +		return true;
> +
> +	/*
> +	 * Note, we do this without taking the timeline->mutex. We cannot
> +	 * as we may be called while retiring the kernel context and so
> +	 * already underneath the timeline->mutex. Instead we rely on the
> +	 * exclusive property of the intel_engine_park that prevents anyone
> +	 * else from creating a request on this engine. This also requires
> +	 * that the ring is empty and we avoid any waits while constructing
> +	 * the context, as they assume protection by the timeline->mutex.
> +	 * This should hold true as we can only park the engine after
> +	 * retiring the last request, thus all rings should be empty and
> +	 * all timelines idle.
> +	 */
> +	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
> +	if (IS_ERR(rq))
> +		/* Context switch failed, hope for the best! Maybe reset? */
> +		return true;
> +
> +	/* Check again on the next retirement. */
> +	engine->wakeref_serial = engine->serial + 1;

Is engine->serial guaranteed to be stable at this point? I guess so 
since there can only be one park at a time.

> +	__i915_request_commit(rq);
> +
> +	return false;
> +}
> +
> +static int intel_engine_park(struct intel_wakeref *wf)
> +{
> +	struct intel_engine_cs *engine =
> +		container_of(wf, typeof(*engine), wakeref);
> +
> +	/*
> +	 * If one and only one request is completed between pm events,
> +	 * we know that we are inside the kernel context and it is
> +	 * safe to power down. (We are paranoid in case that runtime
> +	 * suspend causes corruption to the active context image, and
> +	 * want to avoid that impacting userspace.)
> +	 */
> +	if (!switch_to_kernel_context(engine))
> +		return -EBUSY;

But it is ignored by intel_engine_pm_put. Should it be a WARN_ON or 
something?

> +
> +	GEM_TRACE("%s\n", engine->name);
> +
> +	intel_engine_disarm_breadcrumbs(engine);
> +
> +	/* Must be reset upon idling, or we may miss the busy wakeup. */
> +	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
> +
> +	if (engine->park)
> +		engine->park(engine);
> +
> +	if (engine->pinned_default_state) {
> +		i915_gem_object_unpin_map(engine->default_state);
> +		engine->pinned_default_state = NULL;
> +	}
> +
> +	engine->execlists.no_priolist = false;
> +
> +	intel_gt_pm_put(engine->i915);
> +	return 0;
> +}
> +
> +void intel_engine_pm_put(struct intel_engine_cs *engine)
> +{
> +	intel_wakeref_put(engine->i915, &engine->wakeref, intel_engine_park);
> +}
> +
> +void intel_engine_init__pm(struct intel_engine_cs *engine)
> +{
> +	intel_wakeref_init(&engine->wakeref);
> +}
> +
> +int intel_engines_resume(struct drm_i915_private *i915)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	int err = 0;
> +
> +	/*
> +	 * After resume, we may need to poke into the pinned kernel
> +	 * contexts to paper over any damage caused by the sudden suspend.
> +	 * Only the kernel contexts should remain pinned over suspend,
> +	 * allowing us to fixup the user contexts on their first pin.
> +	 */
> +	intel_gt_pm_get(i915);
> +	for_each_engine(engine, i915, id) {
> +		intel_engine_pm_get(engine);
> +		engine->serial++; /* kernel context lost */
> +		err = engine->resume(engine);
> +		intel_engine_pm_put(engine);
> +		if (err) {
> +			dev_err(i915->drm.dev,
> +				"Failed to restart %s (%d)\n",
> +				engine->name, err);
> +			break;
> +		}
> +	}
> +	intel_gt_pm_put(i915);
> +
> +	return err;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> new file mode 100644
> index 000000000000..143ac90ba117
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> @@ -0,0 +1,20 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_ENGINE_PM_H
> +#define INTEL_ENGINE_PM_H
> +
> +struct drm_i915_private;
> +struct intel_engine_cs;
> +
> +void intel_engine_pm_get(struct intel_engine_cs *engine);
> +void intel_engine_pm_put(struct intel_engine_cs *engine);
> +
> +void intel_engine_init__pm(struct intel_engine_cs *engine);
> +
> +int intel_engines_resume(struct drm_i915_private *i915);
> +
> +#endif /* INTEL_ENGINE_PM_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 3adf58da6d2c..d972c339309c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -20,6 +20,7 @@
>   #include "i915_selftest.h"
>   #include "i915_timeline_types.h"
>   #include "intel_sseu.h"
> +#include "intel_wakeref.h"
>   #include "intel_workarounds_types.h"
>   
>   #define I915_MAX_SLICES	3
> @@ -287,6 +288,10 @@ struct intel_engine_cs {
>   	struct intel_context *kernel_context; /* pinned */
>   	struct intel_context *preempt_context; /* pinned; optional */
>   
> +	unsigned long serial;
> +
> +	unsigned long wakeref_serial;
> +	struct intel_wakeref wakeref;
>   	struct drm_i915_gem_object *default_state;
>   	void *pinned_default_state;
>   
> @@ -359,7 +364,7 @@ struct intel_engine_cs {
>   	void		(*irq_enable)(struct intel_engine_cs *engine);
>   	void		(*irq_disable)(struct intel_engine_cs *engine);
>   
> -	int		(*init_hw)(struct intel_engine_cs *engine);
> +	int		(*resume)(struct intel_engine_cs *engine);
>   
>   	struct {
>   		void (*prepare)(struct intel_engine_cs *engine);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> new file mode 100644
> index 000000000000..ae7155f0e063
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -0,0 +1,143 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "intel_gt_pm.h"
> +#include "intel_pm.h"
> +#include "intel_wakeref.h"
> +
> +static void pm_notify(struct drm_i915_private *i915, int state)
> +{
> +	blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915);
> +}
> +
> +static int intel_gt_unpark(struct intel_wakeref *wf)
> +{
> +	struct drm_i915_private *i915 =
> +		container_of(wf, typeof(*i915), gt.wakeref);
> +
> +	GEM_TRACE("\n");
> +
> +	/*
> +	 * It seems that the DMC likes to transition between the DC states a lot
> +	 * when there are no connected displays (no active power domains) during
> +	 * command submission.
> +	 *
> +	 * This activity has negative impact on the performance of the chip with
> +	 * huge latencies observed in the interrupt handler and elsewhere.
> +	 *
> +	 * Work around it by grabbing a GT IRQ power domain whilst there is any
> +	 * GT activity, preventing any DC state transitions.
> +	 */
> +	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
> +	GEM_BUG_ON(!i915->gt.awake);
> +
> +	intel_enable_gt_powersave(i915);
> +
> +	i915_update_gfx_val(i915);
> +	if (INTEL_GEN(i915) >= 6)
> +		gen6_rps_busy(i915);
> +
> +	i915_pmu_gt_unparked(i915);
> +
> +	i915_queue_hangcheck(i915);
> +
> +	pm_notify(i915, INTEL_GT_UNPARK);
> +
> +	return 0;
> +}
> +
> +void intel_gt_pm_get(struct drm_i915_private *i915)
> +{
> +	intel_wakeref_get(i915, &i915->gt.wakeref, intel_gt_unpark);
> +}
> +
> +static int intel_gt_park(struct intel_wakeref *wf)
> +{
> +	struct drm_i915_private *i915 =
> +		container_of(wf, typeof(*i915), gt.wakeref);
> +	intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake);
> +
> +	GEM_TRACE("\n");
> +
> +	pm_notify(i915, INTEL_GT_PARK);
> +
> +	i915_pmu_gt_parked(i915);
> +	if (INTEL_GEN(i915) >= 6)
> +		gen6_rps_idle(i915);
> +
> +	GEM_BUG_ON(!wakeref);
> +	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
> +
> +	return 0;
> +}
> +
> +void intel_gt_pm_put(struct drm_i915_private *i915)
> +{
> +	intel_wakeref_put(i915, &i915->gt.wakeref, intel_gt_park);
> +}
> +
> +void intel_gt_pm_init(struct drm_i915_private *i915)
> +{
> +	intel_wakeref_init(&i915->gt.wakeref);
> +	BLOCKING_INIT_NOTIFIER_HEAD(&i915->gt.pm_notifications);
> +}
> +
> +static bool reset_engines(struct drm_i915_private *i915)
> +{
> +	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
> +		return false;
> +
> +	return intel_gpu_reset(i915, ALL_ENGINES) == 0;
> +}
> +
> +/**
> + * intel_gt_sanitize: called after the GPU has lost power
> + * @i915: the i915 device
> + * @force: ignore a failed reset and sanitize engine state anyway
> + *
> + * Anytime we reset the GPU, either with an explicit GPU reset or through a
> + * PCI power cycle, the GPU loses state and we must reset our state tracking
> + * to match. Note that calling intel_gt_sanitize() if the GPU has not
> + * been reset results in much confusion!
> + */
> +void intel_gt_sanitize(struct drm_i915_private *i915, bool force)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	GEM_TRACE("\n");
> +
> +	if (!reset_engines(i915) && !force)
> +		return;
> +
> +	for_each_engine(engine, i915, id)
> +		intel_engine_reset(engine, false);
> +}
> +
> +void intel_gt_resume(struct drm_i915_private *i915)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	/*
> +	 * After resume, we may need to poke into the pinned kernel
> +	 * contexts to paper over any damage caused by the sudden suspend.
> +	 * Only the kernel contexts should remain pinned over suspend,
> +	 * allowing us to fixup the user contexts on their first pin.
> +	 */
> +	for_each_engine(engine, i915, id) {
> +		struct intel_context *ce;
> +
> +		ce = engine->kernel_context;
> +		if (ce)
> +			ce->ops->reset(ce);
> +
> +		ce = engine->preempt_context;
> +		if (ce)
> +			ce->ops->reset(ce);
> +	}
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> new file mode 100644
> index 000000000000..7dd1130a19a4
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> @@ -0,0 +1,27 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_GT_PM_H
> +#define INTEL_GT_PM_H
> +
> +#include <linux/types.h>
> +
> +struct drm_i915_private;
> +
> +enum {
> +	INTEL_GT_UNPARK,
> +	INTEL_GT_PARK,
> +};
> +
> +void intel_gt_pm_get(struct drm_i915_private *i915);
> +void intel_gt_pm_put(struct drm_i915_private *i915);
> +
> +void intel_gt_pm_init(struct drm_i915_private *i915);
> +
> +void intel_gt_sanitize(struct drm_i915_private *i915, bool force);
> +void intel_gt_resume(struct drm_i915_private *i915);
> +
> +#endif /* INTEL_GT_PM_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> index 3053a706a561..e5eaa06fe74d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> @@ -256,6 +256,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>   	struct intel_engine_cs *engine;
>   	enum intel_engine_id id;
>   	unsigned int hung = 0, stuck = 0, wedged = 0;
> +	intel_wakeref_t wakeref;
>   
>   	if (!i915_modparams.enable_hangcheck)
>   		return;
> @@ -266,6 +267,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>   	if (i915_terminally_wedged(dev_priv))
>   		return;
>   
> +	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> +	if (!wakeref)
> +		return;
> +
>   	/* As enabling the GPU requires fairly extensive mmio access,
>   	 * periodically arm the mmio checker to see if we are triggering
>   	 * any invalid access.
> @@ -313,6 +318,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>   	if (hung)
>   		hangcheck_declare_hang(dev_priv, hung, stuck);
>   
> +	intel_runtime_pm_put(dev_priv, wakeref);
> +
>   	/* Reset timer in case GPU hangs without another request being added */
>   	i915_queue_hangcheck(dev_priv);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index edec7f183688..d17c08e26935 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1789,7 +1789,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
>   	return unexpected;
>   }
>   
> -static int gen8_init_common_ring(struct intel_engine_cs *engine)
> +static int execlists_resume(struct intel_engine_cs *engine)
>   {
>   	intel_engine_apply_workarounds(engine);
>   	intel_engine_apply_whitelist(engine);
> @@ -1822,7 +1822,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
>   	 * completed the reset in i915_gem_reset_finish(). If a request
>   	 * is completed by one engine, it may then queue a request
>   	 * to a second via its execlists->tasklet *just* as we are
> -	 * calling engine->init_hw() and also writing the ELSP.
> +	 * calling engine->resume() and also writing the ELSP.
>   	 * Turning off the execlists->tasklet until the reset is over
>   	 * prevents the race.
>   	 */
> @@ -2391,7 +2391,7 @@ static void
>   logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>   {
>   	/* Default vfuncs which can be overriden by each engine. */
> -	engine->init_hw = gen8_init_common_ring;
> +	engine->resume = execlists_resume;
>   
>   	engine->reset.prepare = execlists_reset_prepare;
>   	engine->reset.reset = execlists_reset;
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 996164d07397..af85723c7e2f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -9,6 +9,8 @@
>   
>   #include "i915_drv.h"
>   #include "i915_gpu_error.h"
> +#include "intel_engine_pm.h"
> +#include "intel_gt_pm.h"
>   #include "intel_reset.h"
>   
>   #include "intel_guc.h"
> @@ -683,6 +685,7 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
>   	 * written to the powercontext is undefined and so we may lose
>   	 * GPU state upon resume, i.e. fail to restart after a reset.
>   	 */
> +	intel_engine_pm_get(engine);
>   	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
>   	engine->reset.prepare(engine);
>   }
> @@ -718,6 +721,7 @@ static void reset_prepare(struct drm_i915_private *i915)
>   	struct intel_engine_cs *engine;
>   	enum intel_engine_id id;
>   
> +	intel_gt_pm_get(i915);

It's not in the spirit of the patch to let engines wake up the gt?

>   	for_each_engine(engine, i915, id)
>   		reset_prepare_engine(engine);
>   
> @@ -755,48 +759,10 @@ static int gt_reset(struct drm_i915_private *i915,
>   static void reset_finish_engine(struct intel_engine_cs *engine)
>   {
>   	engine->reset.finish(engine);
> +	intel_engine_pm_put(engine);
>   	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
>   }
>   
> -struct i915_gpu_restart {
> -	struct work_struct work;
> -	struct drm_i915_private *i915;
> -};
> -
> -static void restart_work(struct work_struct *work)

Oh wow I did not see this part of the code so far. Ask a second pair of 
eyes to check on it?

> -{
> -	struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
> -	struct drm_i915_private *i915 = arg->i915;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	intel_wakeref_t wakeref;
> -
> -	wakeref = intel_runtime_pm_get(i915);
> -	mutex_lock(&i915->drm.struct_mutex);
> -	WRITE_ONCE(i915->gpu_error.restart, NULL);
> -
> -	for_each_engine(engine, i915, id) {
> -		struct i915_request *rq;
> -
> -		/*
> -		 * Ostensibily, we always want a context loaded for powersaving,
> -		 * so if the engine is idle after the reset, send a request
> -		 * to load our scratch kernel_context.
> -		 */
> -		if (!intel_engine_is_idle(engine))
> -			continue;
> -
> -		rq = i915_request_create(engine->kernel_context);
> -		if (!IS_ERR(rq))
> -			i915_request_add(rq);
> -	}
> -
> -	mutex_unlock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_put(i915, wakeref);
> -
> -	kfree(arg);
> -}
> -
>   static void reset_finish(struct drm_i915_private *i915)
>   {
>   	struct intel_engine_cs *engine;
> @@ -806,29 +772,7 @@ static void reset_finish(struct drm_i915_private *i915)
>   		reset_finish_engine(engine);
>   		intel_engine_signal_breadcrumbs(engine);
>   	}
> -}
> -
> -static void reset_restart(struct drm_i915_private *i915)
> -{
> -	struct i915_gpu_restart *arg;
> -
> -	/*
> -	 * Following the reset, ensure that we always reload context for
> -	 * powersaving, and to correct engine->last_retired_context. Since
> -	 * this requires us to submit a request, queue a worker to do that
> -	 * task for us to evade any locking here.
> -	 */
> -	if (READ_ONCE(i915->gpu_error.restart))
> -		return;
> -
> -	arg = kmalloc(sizeof(*arg), GFP_KERNEL);
> -	if (arg) {
> -		arg->i915 = i915;
> -		INIT_WORK(&arg->work, restart_work);
> -
> -		WRITE_ONCE(i915->gpu_error.restart, arg);
> -		queue_work(i915->wq, &arg->work);
> -	}
> +	intel_gt_pm_put(i915);
>   }
>   
>   static void nop_submit_request(struct i915_request *request)
> @@ -889,6 +833,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
>   	 * in nop_submit_request.
>   	 */
>   	synchronize_rcu_expedited();
> +	set_bit(I915_WEDGED, &error->flags);
>   
>   	/* Mark all executing requests as skipped */
>   	for_each_engine(engine, i915, id)
> @@ -896,9 +841,6 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
>   
>   	reset_finish(i915);
>   
> -	smp_mb__before_atomic();
> -	set_bit(I915_WEDGED, &error->flags);
> -
>   	GEM_TRACE("end\n");
>   }
>   
> @@ -956,7 +898,7 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
>   	}
>   	mutex_unlock(&i915->gt.timelines.mutex);
>   
> -	intel_engines_sanitize(i915, false);
> +	intel_gt_sanitize(i915, false);
>   
>   	/*
>   	 * Undo nop_submit_request. We prevent all new i915 requests from
> @@ -1034,7 +976,6 @@ void i915_reset(struct drm_i915_private *i915,
>   	GEM_TRACE("flags=%lx\n", error->flags);
>   
>   	might_sleep();
> -	assert_rpm_wakelock_held(i915);
>   	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
>   
>   	/* Clear any previous failed attempts at recovery. Time to try again. */
> @@ -1087,8 +1028,6 @@ void i915_reset(struct drm_i915_private *i915,
>   
>   finish:
>   	reset_finish(i915);
> -	if (!__i915_wedged(error))
> -		reset_restart(i915);
>   	return;
>   
>   taint:
> @@ -1137,6 +1076,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
>   	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
>   	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
>   
> +	if (!intel_wakeref_active(&engine->wakeref))
> +		return 0;

I guess there can't be any races here since stuck engine can't be 
parked. Do we have any tests which trigger this without a guilty 
request? I kind of remember that isn't possible so probably not.

> +
>   	reset_prepare_engine(engine);
>   
>   	if (msg)
> @@ -1168,7 +1110,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
>   	 * have been reset to their default values. Follow the init_ring
>   	 * process to program RING_MODE, HWSP and re-enable submission.
>   	 */
> -	ret = engine->init_hw(engine);
> +	ret = engine->resume(engine);
>   	if (ret)
>   		goto out;
>   
> @@ -1425,25 +1367,6 @@ int i915_terminally_wedged(struct drm_i915_private *i915)
>   	return __i915_wedged(error) ? -EIO : 0;
>   }
>   
> -bool i915_reset_flush(struct drm_i915_private *i915)
> -{
> -	int err;
> -
> -	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
> -
> -	flush_workqueue(i915->wq);
> -	GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
> -
> -	mutex_lock(&i915->drm.struct_mutex);
> -	err = i915_gem_wait_for_idle(i915,
> -				     I915_WAIT_LOCKED |
> -				     I915_WAIT_FOR_IDLE_BOOST,
> -				     MAX_SCHEDULE_TIMEOUT);
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
> -	return !err;
> -}
> -
>   static void i915_wedge_me(struct work_struct *work)
>   {
>   	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
> index 8e662bb43a9b..b52efaab4941 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.h
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.h
> @@ -34,7 +34,6 @@ int i915_reset_engine(struct intel_engine_cs *engine,
>   		      const char *reason);
>   
>   void i915_reset_request(struct i915_request *rq, bool guilty);
> -bool i915_reset_flush(struct drm_i915_private *i915);
>   
>   int __must_check i915_reset_trylock(struct drm_i915_private *i915);
>   void i915_reset_unlock(struct drm_i915_private *i915, int tag);
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index b2bb7d4bfbe3..f164dbe90050 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -637,12 +637,15 @@ static bool stop_ring(struct intel_engine_cs *engine)
>   	return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
>   }
>   
> -static int init_ring_common(struct intel_engine_cs *engine)
> +static int xcs_resume(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *dev_priv = engine->i915;
>   	struct intel_ring *ring = engine->buffer;
>   	int ret = 0;
>   
> +	GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n",
> +		  engine->name, ring->head, ring->tail);
> +
>   	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
>   
>   	if (!stop_ring(engine)) {
> @@ -827,12 +830,9 @@ static int intel_rcs_ctx_init(struct i915_request *rq)
>   	return 0;
>   }
>   
> -static int init_render_ring(struct intel_engine_cs *engine)
> +static int rcs_resume(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *dev_priv = engine->i915;
> -	int ret = init_ring_common(engine);
> -	if (ret)
> -		return ret;
>   
>   	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
>   	if (IS_GEN_RANGE(dev_priv, 4, 6))
> @@ -875,7 +875,7 @@ static int init_render_ring(struct intel_engine_cs *engine)
>   	if (INTEL_GEN(dev_priv) >= 6)
>   		ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
>   
> -	return 0;
> +	return xcs_resume(engine);

This inverts the order between the common and rcs init. One thing which 
jump out is the RING_IMR which is now done after starting the engine. 
Can we lose an interrupt now?

>   }
>   
>   static void cancel_requests(struct intel_engine_cs *engine)
> @@ -2207,7 +2207,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>   
>   	intel_ring_init_irq(dev_priv, engine);
>   
> -	engine->init_hw = init_ring_common;
> +	engine->resume = xcs_resume;
>   	engine->reset.prepare = reset_prepare;
>   	engine->reset.reset = reset_ring;
>   	engine->reset.finish = reset_finish;
> @@ -2269,7 +2269,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
>   	if (IS_HASWELL(dev_priv))
>   		engine->emit_bb_start = hsw_emit_bb_start;
>   
> -	engine->init_hw = init_render_ring;
> +	engine->resume = rcs_resume;
>   
>   	ret = intel_init_ring_buffer(engine);
>   	if (ret)
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index bcfeb0c67997..a97a0ab35703 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -24,6 +24,7 @@
>   
>   #include "i915_drv.h"
>   #include "intel_context.h"
> +#include "intel_engine_pm.h"
>   
>   #include "mock_engine.h"
>   #include "selftests/mock_request.h"
> @@ -268,6 +269,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
>   	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
>   
>   	intel_engine_init_breadcrumbs(&engine->base);
> +	intel_engine_init_execlists(&engine->base);
> +	intel_engine_init__pm(&engine->base);
>   
>   	/* fake hw queue */
>   	spin_lock_init(&engine->hw_lock);
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 87c26920212f..6004d6907e9c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -24,6 +24,8 @@
>   
>   #include <linux/kthread.h>
>   
> +#include "intel_engine_pm.h"
> +
>   #include "i915_selftest.h"
>   #include "selftests/i915_random.h"
>   #include "selftests/igt_flush_test.h"
> @@ -479,19 +481,6 @@ static int igt_reset_nop(void *arg)
>   			break;
>   		}
>   
> -		if (!i915_reset_flush(i915)) {
> -			struct drm_printer p =
> -				drm_info_printer(i915->drm.dev);
> -
> -			pr_err("%s failed to idle after reset\n",
> -			       engine->name);
> -			intel_engine_dump(engine, &p,
> -					  "%s\n", engine->name);
> -
> -			err = -EIO;
> -			break;
> -		}
> -
>   		err = igt_flush_test(i915, 0);
>   		if (err)
>   			break;
> @@ -594,19 +583,6 @@ static int igt_reset_nop_engine(void *arg)
>   				err = -EINVAL;
>   				break;
>   			}
> -
> -			if (!i915_reset_flush(i915)) {
> -				struct drm_printer p =
> -					drm_info_printer(i915->drm.dev);
> -
> -				pr_err("%s failed to idle after reset\n",
> -				       engine->name);
> -				intel_engine_dump(engine, &p,
> -						  "%s\n", engine->name);
> -
> -				err = -EIO;
> -				break;
> -			}
>   		} while (time_before(jiffies, end_time));
>   		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>   		pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
> @@ -669,6 +645,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>   		reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
>   							     engine);
>   
> +		intel_engine_pm_get(engine);
>   		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>   		do {
>   			if (active) {
> @@ -721,21 +698,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>   				err = -EINVAL;
>   				break;
>   			}
> -
> -			if (!i915_reset_flush(i915)) {
> -				struct drm_printer p =
> -					drm_info_printer(i915->drm.dev);
> -
> -				pr_err("%s failed to idle after reset\n",
> -				       engine->name);
> -				intel_engine_dump(engine, &p,
> -						  "%s\n", engine->name);
> -
> -				err = -EIO;
> -				break;
> -			}
>   		} while (time_before(jiffies, end_time));
>   		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
> +		intel_engine_pm_put(engine);
>   
>   		if (err)
>   			break;
> @@ -942,6 +907,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
>   			get_task_struct(tsk);
>   		}
>   
> +		intel_engine_pm_get(engine);
>   		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>   		do {
>   			struct i915_request *rq = NULL;
> @@ -1018,6 +984,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
>   			}
>   		} while (time_before(jiffies, end_time));
>   		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
> +		intel_engine_pm_put(engine);
>   		pr_info("i915_reset_engine(%s:%s): %lu resets\n",
>   			engine->name, test_name, count);
>   
> @@ -1069,7 +1036,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
>   		if (err)
>   			break;
>   
> -		err = igt_flush_test(i915, 0);
> +		mutex_lock(&i915->drm.struct_mutex);
> +		err = igt_flush_test(i915, I915_WAIT_LOCKED);
> +		mutex_unlock(&i915->drm.struct_mutex);
>   		if (err)
>   			break;
>   	}
> diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> index 96c6282f3a10..461d91737077 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> @@ -71,7 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   {
>   	const u32 base = engine->mmio_base;
>   	struct drm_i915_gem_object *result;
> -	intel_wakeref_t wakeref;
>   	struct i915_request *rq;
>   	struct i915_vma *vma;
>   	u32 srm, *cs;
> @@ -103,9 +102,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   	if (err)
>   		goto err_obj;
>   
> -	rq = ERR_PTR(-ENODEV);
> -	with_intel_runtime_pm(engine->i915, wakeref)
> -		rq = i915_request_alloc(engine, ctx);
> +	rq = i915_request_alloc(engine, ctx);
>   	if (IS_ERR(rq)) {
>   		err = PTR_ERR(rq);
>   		goto err_pin;
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8dcba78fb43b..00d3ff746eb1 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2041,8 +2041,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   	}
>   
>   	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
> -	seq_printf(m, "GPU busy? %s [%d requests]\n",
> -		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
> +	seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake));
>   	seq_printf(m, "Boosts outstanding? %d\n",
>   		   atomic_read(&rps->num_waiters));
>   	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
> @@ -2061,9 +2060,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   
>   	seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
>   
> -	if (INTEL_GEN(dev_priv) >= 6 &&
> -	    rps->enabled &&
> -	    dev_priv->gt.active_requests) {
> +	if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && dev_priv->gt.awake) {
>   		u32 rpup, rpupei;
>   		u32 rpdown, rpdownei;
>   
> @@ -3092,9 +3089,9 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>   
>   	wakeref = intel_runtime_pm_get(dev_priv);
>   
> -	seq_printf(m, "GT awake? %s\n", yesno(dev_priv->gt.awake));
> -	seq_printf(m, "Global active requests: %d\n",
> -		   dev_priv->gt.active_requests);
> +	seq_printf(m, "GT awake? %s [%d]\n",
> +		   yesno(dev_priv->gt.awake),
> +		   atomic_read(&dev_priv->gt.wakeref.count));
>   	seq_printf(m, "CS timestamp frequency: %u kHz\n",
>   		   RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
>   
> @@ -3940,8 +3937,7 @@ i915_drop_caches_set(void *data, u64 val)
>   
>   	if (val & DROP_IDLE) {
>   		do {
> -			if (READ_ONCE(i915->gt.active_requests))
> -				flush_delayed_work(&i915->gem.retire_work);
> +			flush_delayed_work(&i915->gem.retire_work);
>   			drain_delayed_work(&i915->gem.idle_work);
>   		} while (READ_ONCE(i915->gt.awake));
>   	}
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 98b997526daa..c8cb70d4fe91 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -47,8 +47,9 @@
>   #include <drm/drm_probe_helper.h>
>   #include <drm/i915_drm.h>
>   
> -#include "gt/intel_workarounds.h"
> +#include "gt/intel_gt_pm.h"
>   #include "gt/intel_reset.h"
> +#include "gt/intel_workarounds.h"
>   
>   #include "i915_drv.h"
>   #include "i915_pmu.h"
> @@ -2323,7 +2324,7 @@ static int i915_drm_resume_early(struct drm_device *dev)
>   
>   	intel_power_domains_resume(dev_priv);
>   
> -	intel_engines_sanitize(dev_priv, true);
> +	intel_gt_sanitize(dev_priv, true);
>   
>   	enable_rpm_wakeref_asserts(dev_priv);
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cbae9be052e0..e5ae6c36e959 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2006,10 +2006,10 @@ struct drm_i915_private {
>   			struct list_head hwsp_free_list;
>   		} timelines;
>   
> -		intel_engine_mask_t active_engines;
>   		struct list_head active_rings;
>   		struct list_head closed_vma;
> -		u32 active_requests;
> +
> +		struct intel_wakeref wakeref;
>   
>   		/**
>   		 * Is the GPU currently considered idle, or busy executing
> @@ -2020,12 +2020,16 @@ struct drm_i915_private {
>   		 */
>   		intel_wakeref_t awake;
>   
> +		struct blocking_notifier_head pm_notifications;
> +
>   		ktime_t last_init_time;
>   
>   		struct i915_vma *scratch;
>   	} gt;
>   
>   	struct {
> +		struct notifier_block pm_notifier;
> +
>   		/**
>   		 * We leave the user IRQ off as much as possible,
>   		 * but this means that requests will finish and never
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 74b99126830b..d0211271f103 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -39,6 +39,8 @@
>   #include <linux/dma-buf.h>
>   #include <linux/mman.h>
>   
> +#include "gt/intel_engine_pm.h"
> +#include "gt/intel_gt_pm.h"
>   #include "gt/intel_mocs.h"
>   #include "gt/intel_reset.h"
>   #include "gt/intel_workarounds.h"
> @@ -2911,9 +2913,6 @@ wait_for_timelines(struct drm_i915_private *i915,
>   	struct i915_gt_timelines *gt = &i915->gt.timelines;
>   	struct i915_timeline *tl;
>   
> -	if (!READ_ONCE(i915->gt.active_requests))
> -		return timeout;
> -
>   	mutex_lock(&gt->mutex);
>   	list_for_each_entry(tl, &gt->active_list, link) {
>   		struct i915_request *rq;
> @@ -2953,9 +2952,10 @@ wait_for_timelines(struct drm_i915_private *i915,
>   int i915_gem_wait_for_idle(struct drm_i915_private *i915,
>   			   unsigned int flags, long timeout)
>   {
> -	GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
> +	GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
>   		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
> -		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
> +		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
> +		  yesno(i915->gt.awake));
>   
>   	/* If the device is asleep, we have no requests outstanding */
>   	if (!READ_ONCE(i915->gt.awake))
> @@ -4177,7 +4177,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>   	 * it may impact the display and we are uncertain about the stability
>   	 * of the reset, so this could be applied to even earlier gen.
>   	 */
> -	intel_engines_sanitize(i915, false);
> +	intel_gt_sanitize(i915, false);
>   
>   	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
>   	intel_runtime_pm_put(i915, wakeref);
> @@ -4235,27 +4235,6 @@ static void init_unused_rings(struct drm_i915_private *dev_priv)
>   	}
>   }
>   
> -static int __i915_gem_restart_engines(void *data)
> -{
> -	struct drm_i915_private *i915 = data;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	int err;
> -
> -	for_each_engine(engine, i915, id) {
> -		err = engine->init_hw(engine);
> -		if (err) {
> -			DRM_ERROR("Failed to restart %s (%d)\n",
> -				  engine->name, err);
> -			return err;
> -		}
> -	}
> -
> -	intel_engines_set_scheduler_caps(i915);
> -
> -	return 0;
> -}
> -
>   int i915_gem_init_hw(struct drm_i915_private *dev_priv)
>   {
>   	int ret;
> @@ -4314,12 +4293,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
>   	intel_mocs_init_l3cc_table(dev_priv);
>   
>   	/* Only when the HW is re-initialised, can we replay the requests */
> -	ret = __i915_gem_restart_engines(dev_priv);
> +	ret = intel_engines_resume(dev_priv);
>   	if (ret)
>   		goto cleanup_uc;
>   
>   	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
>   
> +	intel_engines_set_scheduler_caps(dev_priv);
>   	return 0;
>   
>   cleanup_uc:
> @@ -4625,6 +4605,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   err_init_hw:
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
>   
> +	i915_gem_set_wedged(dev_priv);
>   	i915_gem_suspend(dev_priv);
>   	i915_gem_suspend_late(dev_priv);
>   
> @@ -4686,6 +4667,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   
>   void i915_gem_fini(struct drm_i915_private *dev_priv)
>   {
> +	GEM_BUG_ON(dev_priv->gt.awake);
> +
>   	i915_gem_suspend_late(dev_priv);
>   	intel_disable_gt_powersave(dev_priv);
>   
> @@ -4780,6 +4763,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
>   {
>   	int err;
>   
> +	intel_gt_pm_init(dev_priv);
> +
>   	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
>   	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index 9074eb1e843f..67f8a4a807a0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -75,9 +75,6 @@ struct drm_i915_private;
>   
>   #define I915_GEM_IDLE_TIMEOUT (HZ / 5)
>   
> -void i915_gem_park(struct drm_i915_private *i915);
> -void i915_gem_unpark(struct drm_i915_private *i915);
> -
>   static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
>   {
>   	if (!atomic_fetch_inc(&t->count))
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 3eb1a664b5fa..76ed74e75d82 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -824,26 +824,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
>   	return 0;
>   }
>   
> -static struct i915_request *
> -last_request_on_engine(struct i915_timeline *timeline,
> -		       struct intel_engine_cs *engine)
> -{
> -	struct i915_request *rq;
> -
> -	GEM_BUG_ON(timeline == &engine->timeline);
> -
> -	rq = i915_active_request_raw(&timeline->last_request,
> -				     &engine->i915->drm.struct_mutex);
> -	if (rq && rq->engine->mask & engine->mask) {
> -		GEM_TRACE("last request on engine %s: %llx:%llu\n",
> -			  engine->name, rq->fence.context, rq->fence.seqno);
> -		GEM_BUG_ON(rq->timeline != timeline);
> -		return rq;
> -	}
> -
> -	return NULL;
> -}
> -
>   struct context_barrier_task {
>   	struct i915_active base;
>   	void (*task)(void *data);
> @@ -871,7 +851,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   	struct drm_i915_private *i915 = ctx->i915;
>   	struct context_barrier_task *cb;
>   	struct intel_context *ce, *next;
> -	intel_wakeref_t wakeref;
>   	int err = 0;
>   
>   	lockdep_assert_held(&i915->drm.struct_mutex);
> @@ -884,7 +863,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   	i915_active_init(i915, &cb->base, cb_retire);
>   	i915_active_acquire(&cb->base);
>   
> -	wakeref = intel_runtime_pm_get(i915);
>   	rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
>   		struct intel_engine_cs *engine = ce->engine;
>   		struct i915_request *rq;
> @@ -914,7 +892,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   		if (err)
>   			break;
>   	}
> -	intel_runtime_pm_put(i915, wakeref);
>   
>   	cb->task = err ? NULL : task; /* caller needs to unwind instead */
>   	cb->data = data;
> @@ -924,54 +901,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   	return err;
>   }
>   
> -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
> -				      intel_engine_mask_t mask)
> -{
> -	struct intel_engine_cs *engine;
> -
> -	GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake));
> -
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -	GEM_BUG_ON(!i915->kernel_context);
> -
> -	/* Inoperable, so presume the GPU is safely pointing into the void! */
> -	if (i915_terminally_wedged(i915))
> -		return 0;
> -
> -	for_each_engine_masked(engine, i915, mask, mask) {
> -		struct intel_ring *ring;
> -		struct i915_request *rq;
> -
> -		rq = i915_request_create(engine->kernel_context);
> -		if (IS_ERR(rq))
> -			return PTR_ERR(rq);
> -
> -		/* Queue this switch after all other activity */
> -		list_for_each_entry(ring, &i915->gt.active_rings, active_link) {
> -			struct i915_request *prev;
> -
> -			prev = last_request_on_engine(ring->timeline, engine);
> -			if (!prev)
> -				continue;
> -
> -			if (prev->gem_context == i915->kernel_context)
> -				continue;
> -
> -			GEM_TRACE("add barrier on %s for %llx:%lld\n",
> -				  engine->name,
> -				  prev->fence.context,
> -				  prev->fence.seqno);
> -			i915_sw_fence_await_sw_fence_gfp(&rq->submit,
> -							 &prev->submit,
> -							 I915_FENCE_GFP);
> -		}
> -
> -		i915_request_add(rq);
> -	}
> -
> -	return 0;
> -}
> -
>   static int get_ppgtt(struct drm_i915_file_private *file_priv,
>   		     struct i915_gem_context *ctx,
>   		     struct drm_i915_gem_context_param *args)
> @@ -1169,9 +1098,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
>   static int
>   gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>   {
> -	struct drm_i915_private *i915 = ce->engine->i915;
>   	struct i915_request *rq;
> -	intel_wakeref_t wakeref;
>   	int ret;
>   
>   	lockdep_assert_held(&ce->pin_mutex);
> @@ -1185,14 +1112,9 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>   	if (!intel_context_is_pinned(ce))
>   		return 0;
>   
> -	/* Submitting requests etc needs the hw awake. */
> -	wakeref = intel_runtime_pm_get(i915);
> -
>   	rq = i915_request_create(ce->engine->kernel_context);
> -	if (IS_ERR(rq)) {
> -		ret = PTR_ERR(rq);
> -		goto out_put;
> -	}
> +	if (IS_ERR(rq))
> +		return PTR_ERR(rq);
>   
>   	/* Queue this switch after all other activity by this context. */
>   	ret = i915_active_request_set(&ce->ring->timeline->last_request, rq);
> @@ -1216,9 +1138,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>   
>   out_add:
>   	i915_request_add(rq);
> -out_put:
> -	intel_runtime_pm_put(i915, wakeref);
> -
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index cec278ab04e2..5a8e080499fb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -141,10 +141,6 @@ int i915_gem_context_open(struct drm_i915_private *i915,
>   			  struct drm_file *file);
>   void i915_gem_context_close(struct drm_file *file);
>   
> -int i915_switch_context(struct i915_request *rq);
> -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
> -				      intel_engine_mask_t engine_mask);
> -
>   void i915_gem_context_release(struct kref *ctx_ref);
>   struct i915_gem_context *
>   i915_gem_context_create_gvt(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 060f5903544a..0bdb3e072ba5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -36,15 +36,8 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
>   	bool fail_if_busy:1;
>   } igt_evict_ctl;)
>   
> -static bool ggtt_is_idle(struct drm_i915_private *i915)
> -{
> -	return !i915->gt.active_requests;
> -}
> -
>   static int ggtt_flush(struct drm_i915_private *i915)
>   {
> -	int err;
> -
>   	/*
>   	 * Not everything in the GGTT is tracked via vma (otherwise we
>   	 * could evict as required with minimal stalling) so we are forced
> @@ -52,19 +45,10 @@ static int ggtt_flush(struct drm_i915_private *i915)
>   	 * the hopes that we can then remove contexts and the like only
>   	 * bound by their active reference.
>   	 */
> -	err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines);
> -	if (err)
> -		return err;
> -
> -	err = i915_gem_wait_for_idle(i915,
> -				     I915_WAIT_INTERRUPTIBLE |
> -				     I915_WAIT_LOCKED,
> -				     MAX_SCHEDULE_TIMEOUT);
> -	if (err)
> -		return err;
> -
> -	GEM_BUG_ON(!ggtt_is_idle(i915));
> -	return 0;
> +	return i915_gem_wait_for_idle(i915,
> +				      I915_WAIT_INTERRUPTIBLE |
> +				      I915_WAIT_LOCKED,
> +				      MAX_SCHEDULE_TIMEOUT);
>   }
>   
>   static bool
> @@ -222,24 +206,17 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   	 * us a termination condition, when the last retired context is
>   	 * the kernel's there is no more we can evict.
>   	 */
> -	if (!ggtt_is_idle(dev_priv)) {
> -		if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
> -			return -EBUSY;
> +	if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
> +		return -EBUSY;
>   
> -		ret = ggtt_flush(dev_priv);
> -		if (ret)
> -			return ret;
> +	ret = ggtt_flush(dev_priv);
> +	if (ret)
> +		return ret;
>   
> -		cond_resched();
> -		goto search_again;
> -	}
> +	cond_resched();
>   
> -	/*
> -	 * If we still have pending pageflip completions, drop
> -	 * back to userspace to give our workqueues time to
> -	 * acquire our locks and unpin the old scanouts.
> -	 */
> -	return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC;
> +	flags |= PIN_NONBLOCK;
> +	goto search_again;
>   
>   found:
>   	/* drm_mm doesn't allow any other other operations while
> diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c
> index 9fb0e8d567a2..3554d55dae35 100644
> --- a/drivers/gpu/drm/i915/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/i915_gem_pm.c
> @@ -4,136 +4,63 @@
>    * Copyright © 2019 Intel Corporation
>    */
>   
> +#include "gt/intel_gt_pm.h"
> +
>   #include "i915_drv.h"
>   #include "i915_gem_pm.h"
>   #include "i915_globals.h"
> -#include "intel_pm.h"
>   
> -static void __i915_gem_park(struct drm_i915_private *i915)
> +static void i915_gem_park(struct drm_i915_private *i915)
>   {
> -	intel_wakeref_t wakeref;
> -
> -	GEM_TRACE("\n");
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
>   
>   	lockdep_assert_held(&i915->drm.struct_mutex);
> -	GEM_BUG_ON(i915->gt.active_requests);
> -	GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
> -
> -	if (!i915->gt.awake)
> -		return;
> -
> -	/*
> -	 * Be paranoid and flush a concurrent interrupt to make sure
> -	 * we don't reactivate any irq tasklets after parking.
> -	 *
> -	 * FIXME: Note that even though we have waited for execlists to be idle,
> -	 * there may still be an in-flight interrupt even though the CSB
> -	 * is now empty. synchronize_irq() makes sure that a residual interrupt
> -	 * is completed before we continue, but it doesn't prevent the HW from
> -	 * raising a spurious interrupt later. To complete the shield we should
> -	 * coordinate disabling the CS irq with flushing the interrupts.
> -	 */
> -	synchronize_irq(i915->drm.irq);
> -
> -	intel_engines_park(i915);
> -	i915_timelines_park(i915);
> -
> -	i915_pmu_gt_parked(i915);
> -	i915_vma_parked(i915);
> -
> -	wakeref = fetch_and_zero(&i915->gt.awake);
> -	GEM_BUG_ON(!wakeref);
> -
> -	if (INTEL_GEN(i915) >= 6)
> -		gen6_rps_idle(i915);
> -
> -	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
> -
> -	i915_globals_park();
> -}
> -
> -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
> -					  unsigned long mask)
> -{
> -	bool result = true;
> -
> -	/*
> -	 * Even if we fail to switch, give whatever is running a small chance
> -	 * to save itself before we report the failure. Yes, this may be a
> -	 * false positive due to e.g. ENOMEM, caveat emptor!
> -	 */
> -	if (i915_gem_switch_to_kernel_context(i915, mask))
> -		result = false;
>   
> -	if (i915_gem_wait_for_idle(i915,
> -				   I915_WAIT_LOCKED |
> -				   I915_WAIT_FOR_IDLE_BOOST,
> -				   I915_GEM_IDLE_TIMEOUT))
> -		result = false;
> +	for_each_engine(engine, i915, id) {
> +		/*
> +		 * We are committed now to parking the engines, make sure there
> +		 * will be no more interrupts arriving later and the engines
> +		 * are truly idle.
> +		 */
> +		if (wait_for(intel_engine_is_idle(engine), 10)) {
> +			struct drm_printer p = drm_debug_printer(__func__);
>   
> -	if (!result) {
> -		if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
>   			dev_err(i915->drm.dev,
> -				"Failed to idle engines, declaring wedged!\n");
> -			GEM_TRACE_DUMP();
> +				"%s is not idle before parking\n",
> +				engine->name);
> +			intel_engine_dump(engine, &p, NULL);
>   		}
> +		tasklet_kill(&engine->execlists.tasklet);
>   
> -		/* Forcibly cancel outstanding work and leave the gpu quiet. */
> -		i915_gem_set_wedged(i915);
> +		i915_gem_batch_pool_fini(&engine->batch_pool);
>   	}
>   
> -	i915_retire_requests(i915); /* ensure we flush after wedging */
> -	return result;
> +	i915_timelines_park(i915);
> +	i915_vma_parked(i915);
> +
> +	i915_globals_park();
>   }
>   
>   static void idle_work_handler(struct work_struct *work)
>   {
>   	struct drm_i915_private *i915 =
>   		container_of(work, typeof(*i915), gem.idle_work.work);
> -	bool rearm_hangcheck;
> -
> -	if (!READ_ONCE(i915->gt.awake))
> -		return;
> -
> -	if (READ_ONCE(i915->gt.active_requests))
> -		return;
> -
> -	rearm_hangcheck =
> -		cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
>   
>   	if (!mutex_trylock(&i915->drm.struct_mutex)) {

Should struct_mutex be taken by i915_gem_park, inside the wakeref lock? 
Or that would create a lock inversion somewhere else?

>   		/* Currently busy, come back later */
>   		mod_delayed_work(i915->wq,
>   				 &i915->gem.idle_work,
>   				 msecs_to_jiffies(50));
> -		goto out_rearm;
> +		return;
>   	}
>   
> -	/*
> -	 * Flush out the last user context, leaving only the pinned
> -	 * kernel context resident. Should anything unfortunate happen
> -	 * while we are idle (such as the GPU being power cycled), no users
> -	 * will be harmed.
> -	 */
> -	if (!work_pending(&i915->gem.idle_work.work) &&
> -	    !i915->gt.active_requests) {
> -		++i915->gt.active_requests; /* don't requeue idle */
> -
> -		switch_to_kernel_context_sync(i915, i915->gt.active_engines);
> -
> -		if (!--i915->gt.active_requests) {
> -			__i915_gem_park(i915);
> -			rearm_hangcheck = false;
> -		}
> -	}
> +	intel_wakeref_lock(&i915->gt.wakeref);
> +	if (!intel_wakeref_active(&i915->gt.wakeref))
> +		i915_gem_park(i915);
> +	intel_wakeref_unlock(&i915->gt.wakeref);
>   
>   	mutex_unlock(&i915->drm.struct_mutex);
> -
> -out_rearm:
> -	if (rearm_hangcheck) {
> -		GEM_BUG_ON(!i915->gt.awake);
> -		i915_queue_hangcheck(i915);
> -	}
>   }
>   
>   static void retire_work_handler(struct work_struct *work)
> @@ -147,97 +74,76 @@ static void retire_work_handler(struct work_struct *work)
>   		mutex_unlock(&i915->drm.struct_mutex);
>   	}
>   
> -	/*
> -	 * Keep the retire handler running until we are finally idle.
> -	 * We do not need to do this test under locking as in the worst-case
> -	 * we queue the retire worker once too often.
> -	 */
> -	if (READ_ONCE(i915->gt.awake))
> +	if (intel_wakeref_active(&i915->gt.wakeref))
>   		queue_delayed_work(i915->wq,
>   				   &i915->gem.retire_work,
>   				   round_jiffies_up_relative(HZ));
>   }
>   
> -void i915_gem_park(struct drm_i915_private *i915)
> +static int pm_notifier(struct notifier_block *nb,
> +		       unsigned long action,
> +		       void *data)
>   {
> -	GEM_TRACE("\n");
> +	struct drm_i915_private *i915 =
> +		container_of(nb, typeof(*i915), gem.pm_notifier);
>   
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -	GEM_BUG_ON(i915->gt.active_requests);
> +	switch (action) {
> +	case INTEL_GT_UNPARK:
> +		i915_globals_unpark();
> +		queue_delayed_work(i915->wq,
> +				   &i915->gem.retire_work,
> +				   round_jiffies_up_relative(HZ));
> +		break;
>   
> -	if (!i915->gt.awake)
> -		return;
> +	case INTEL_GT_PARK:
> +		mod_delayed_work(i915->wq,
> +				 &i915->gem.idle_work,
> +				 msecs_to_jiffies(100));
> +		break;
> +	}
>   
> -	/* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
> -	mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(100));
> +	return NOTIFY_OK;
>   }
>   
> -void i915_gem_unpark(struct drm_i915_private *i915)
> +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
>   {
> -	GEM_TRACE("\n");
> -
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -	GEM_BUG_ON(!i915->gt.active_requests);
> -	assert_rpm_wakelock_held(i915);
> -
> -	if (i915->gt.awake)
> -		return;
> -
> -	/*
> -	 * It seems that the DMC likes to transition between the DC states a lot
> -	 * when there are no connected displays (no active power domains) during
> -	 * command submission.
> -	 *
> -	 * This activity has negative impact on the performance of the chip with
> -	 * huge latencies observed in the interrupt handler and elsewhere.
> -	 *
> -	 * Work around it by grabbing a GT IRQ power domain whilst there is any
> -	 * GT activity, preventing any DC state transitions.
> -	 */
> -	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
> -	GEM_BUG_ON(!i915->gt.awake);
> -
> -	i915_globals_unpark();
> -
> -	intel_enable_gt_powersave(i915);
> -	i915_update_gfx_val(i915);
> -	if (INTEL_GEN(i915) >= 6)
> -		gen6_rps_busy(i915);
> -	i915_pmu_gt_unparked(i915);
> -
> -	intel_engines_unpark(i915);
> +	bool result = true;
>   
> -	i915_queue_hangcheck(i915);
> +	do {
> +		if (i915_gem_wait_for_idle(i915,
> +					   I915_WAIT_LOCKED |
> +					   I915_WAIT_FOR_IDLE_BOOST,
> +					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
> +			/* XXX hide warning from gem_eio */
> +			if (i915_modparams.reset) {
> +				dev_err(i915->drm.dev,
> +					"Failed to idle engines, declaring wedged!\n");
> +				GEM_TRACE_DUMP();
> +			}
> +
> +			/*
> +			 * Forcibly cancel outstanding work and leave
> +			 * the gpu quiet.
> +			 */
> +			i915_gem_set_wedged(i915);
> +			result = false;
> +		}
> +	} while (i915_retire_requests(i915) && result);
>   
> -	queue_delayed_work(i915->wq,
> -			   &i915->gem.retire_work,
> -			   round_jiffies_up_relative(HZ));
> +	GEM_BUG_ON(i915->gt.awake);
> +	return result;
>   }
>   
>   bool i915_gem_load_power_context(struct drm_i915_private *i915)
>   {
> -	/* Force loading the kernel context on all engines */
> -	if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
> -		return false;
> -
> -	/*
> -	 * Immediately park the GPU so that we enable powersaving and
> -	 * treat it as idle. The next time we issue a request, we will
> -	 * unpark and start using the engine->pinned_default_state, otherwise
> -	 * it is in limbo and an early reset may fail.
> -	 */
> -	__i915_gem_park(i915);
> -
> -	return true;
> +	return switch_to_kernel_context_sync(i915);
>   }
>   
>   void i915_gem_suspend(struct drm_i915_private *i915)
>   {
> -	intel_wakeref_t wakeref;
> -
>   	GEM_TRACE("\n");
>   
> -	wakeref = intel_runtime_pm_get(i915);
> +	flush_workqueue(i915->wq);
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   
> @@ -250,10 +156,16 @@ void i915_gem_suspend(struct drm_i915_private *i915)
>   	 * state. Fortunately, the kernel_context is disposable and we do
>   	 * not rely on its state.
>   	 */
> -	switch_to_kernel_context_sync(i915, i915->gt.active_engines);
> +	switch_to_kernel_context_sync(i915);
>   
>   	mutex_unlock(&i915->drm.struct_mutex);
> -	i915_reset_flush(i915);
> +
> +	/*
> +	 * Assert that we successfully flushed all the work and
> +	 * reset the GPU back to its idle, low power state.
> +	 */
> +	GEM_BUG_ON(i915->gt.awake);
> +	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
>   
>   	drain_delayed_work(&i915->gem.retire_work);
>   
> @@ -263,17 +175,9 @@ void i915_gem_suspend(struct drm_i915_private *i915)
>   	 */
>   	drain_delayed_work(&i915->gem.idle_work);
>   
> -	flush_workqueue(i915->wq);
> -
> -	/*
> -	 * Assert that we successfully flushed all the work and
> -	 * reset the GPU back to its idle, low power state.
> -	 */
> -	GEM_BUG_ON(i915->gt.awake);
> +	i915_gem_drain_freed_objects(i915);
>   
>   	intel_uc_suspend(i915);
> -
> -	intel_runtime_pm_put(i915, wakeref);
>   }
>   
>   void i915_gem_suspend_late(struct drm_i915_private *i915)
> @@ -362,4 +266,8 @@ void i915_gem_init__pm(struct drm_i915_private *i915)
>   {
>   	INIT_DELAYED_WORK(&i915->gem.idle_work, idle_work_handler);
>   	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
> +
> +	i915->gem.pm_notifier.notifier_call = pm_notifier;
> +	blocking_notifier_chain_register(&i915->gt.pm_notifications,
> +					 &i915->gem.pm_notifier);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h
> index 52f65e3f06b5..6f7d5d11ac3b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_pm.h
> +++ b/drivers/gpu/drm/i915/i915_gem_pm.h
> @@ -17,9 +17,6 @@ void i915_gem_init__pm(struct drm_i915_private *i915);
>   bool i915_gem_load_power_context(struct drm_i915_private *i915);
>   void i915_gem_resume(struct drm_i915_private *i915);
>   
> -void i915_gem_unpark(struct drm_i915_private *i915);
> -void i915_gem_park(struct drm_i915_private *i915);
> -
>   void i915_gem_idle_work_handler(struct work_struct *work);
>   
>   void i915_gem_suspend(struct drm_i915_private *i915);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index b419d0f59275..2ecd0c6a1c94 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -179,8 +179,6 @@ struct i915_gpu_state {
>   	struct scatterlist *sgl, *fit;
>   };
>   
> -struct i915_gpu_restart;
> -
>   struct i915_gpu_error {
>   	/* For hangcheck timer */
>   #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
> @@ -241,8 +239,6 @@ struct i915_gpu_error {
>   	wait_queue_head_t reset_queue;
>   
>   	struct srcu_struct reset_backoff_srcu;
> -
> -	struct i915_gpu_restart *restart;
>   };
>   
>   struct drm_i915_error_state_buf {
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 672c9ea6c24f..d116b5e69826 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -431,6 +431,8 @@ void __i915_request_submit(struct i915_request *request)
>   	/* Transfer from per-context onto the global per-engine timeline */
>   	move_to_timeline(request, &engine->timeline);
>   
> +	engine->serial++;
> +
>   	trace_i915_request_execute(request);
>   }
>   
> @@ -1146,7 +1148,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
>   	list_add_tail(&rq->ring_link, &ring->request_list);
>   	if (list_is_first(&rq->ring_link, &ring->request_list))
>   		list_add(&ring->active_link, &rq->i915->gt.active_rings);
> -	rq->i915->gt.active_engines |= rq->engine->mask;
>   	rq->emitted_jiffies = jiffies;
>   
>   	/*
> @@ -1418,21 +1419,20 @@ long i915_request_wait(struct i915_request *rq,
>   	return timeout;
>   }
>   
> -void i915_retire_requests(struct drm_i915_private *i915)
> +bool i915_retire_requests(struct drm_i915_private *i915)
>   {
>   	struct intel_ring *ring, *tmp;
>   
>   	lockdep_assert_held(&i915->drm.struct_mutex);
>   
> -	if (!i915->gt.active_requests)
> -		return;

You don't want to replace this with a wakeref_active check?

> -
>   	list_for_each_entry_safe(ring, tmp,
>   				 &i915->gt.active_rings, active_link) {
>   		intel_ring_get(ring); /* last rq holds reference! */
>   		ring_retire_requests(ring);
>   		intel_ring_put(ring);
>   	}
> +
> +	return !list_empty(&i915->gt.active_rings);
>   }
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 36f13b74ec58..1eee7416af31 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -425,6 +425,6 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
>   	rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
>   }
>   
> -void i915_retire_requests(struct drm_i915_private *i915);
> +bool i915_retire_requests(struct drm_i915_private *i915);
>   
>   #endif /* I915_REQUEST_H */
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 13f823ff8083..fd9d3b0d9f47 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -466,26 +466,22 @@ void intel_uc_reset_prepare(struct drm_i915_private *i915)
>   	intel_uc_sanitize(i915);
>   }
>   
> -int intel_uc_suspend(struct drm_i915_private *i915)
> +void intel_uc_suspend(struct drm_i915_private *i915)
>   {
>   	struct intel_guc *guc = &i915->guc;
> +	intel_wakeref_t wakeref;
>   	int err;
>   
> -	if (!USES_GUC(i915))
> -		return 0;
> -
>   	if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> -		return 0;
> -
> -	err = intel_guc_suspend(guc);
> -	if (err) {
> -		DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
> -		return err;
> -	}
> +		return;
>   
> -	guc_disable_communication(guc);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		err = intel_guc_suspend(guc);
> +		if (err)
> +			DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
>   
> -	return 0;
> +		guc_disable_communication(guc);
> +	}
>   }
>   
>   int intel_uc_resume(struct drm_i915_private *i915)
> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> index c14729786652..c92436b1f1c5 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -39,7 +39,7 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
>   int intel_uc_init(struct drm_i915_private *dev_priv);
>   void intel_uc_fini(struct drm_i915_private *dev_priv);
>   void intel_uc_reset_prepare(struct drm_i915_private *i915);
> -int intel_uc_suspend(struct drm_i915_private *dev_priv);
> +void intel_uc_suspend(struct drm_i915_private *i915);
>   int intel_uc_resume(struct drm_i915_private *dev_priv);
>   
>   static inline bool intel_uc_is_using_guc(struct drm_i915_private *i915)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index 6fd70d326468..0342de369d3e 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -16,26 +16,18 @@ static int switch_to_context(struct drm_i915_private *i915,
>   {
>   	struct intel_engine_cs *engine;
>   	enum intel_engine_id id;
> -	intel_wakeref_t wakeref;
> -	int err = 0;
> -
> -	wakeref = intel_runtime_pm_get(i915);
>   
>   	for_each_engine(engine, i915, id) {
>   		struct i915_request *rq;
>   
>   		rq = i915_request_alloc(engine, ctx);
> -		if (IS_ERR(rq)) {
> -			err = PTR_ERR(rq);
> -			break;
> -		}
> +		if (IS_ERR(rq))
> +			return PTR_ERR(rq);
>   
>   		i915_request_add(rq);
>   	}
>   
> -	intel_runtime_pm_put(i915, wakeref);
> -
> -	return err;
> +	return 0;
>   }
>   
>   static void trash_stolen(struct drm_i915_private *i915)
> @@ -120,7 +112,7 @@ static void pm_resume(struct drm_i915_private *i915)
>   	 * that runtime-pm just works.
>   	 */
>   	with_intel_runtime_pm(i915, wakeref) {
> -		intel_engines_sanitize(i915, false);
> +		intel_gt_sanitize(i915, false);
>   		i915_gem_sanitize(i915);
>   		i915_gem_resume(i915);
>   	}
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 9d646fa1b74e..71d896bbade2 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1608,113 +1608,6 @@ __engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines)
>   	return "none";
>   }
>   
> -static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
> -					  struct i915_gem_context *ctx,
> -					  intel_engine_mask_t engines)
> -{
> -	struct intel_engine_cs *engine;
> -	intel_engine_mask_t tmp;
> -	int pass;
> -
> -	GEM_TRACE("Testing %s\n", __engine_name(i915, engines));
> -	for (pass = 0; pass < 4; pass++) { /* Once busy; once idle; repeat */
> -		bool from_idle = pass & 1;
> -		int err;
> -
> -		if (!from_idle) {
> -			for_each_engine_masked(engine, i915, engines, tmp) {
> -				struct i915_request *rq;
> -
> -				rq = i915_request_alloc(engine, ctx);
> -				if (IS_ERR(rq))
> -					return PTR_ERR(rq);
> -
> -				i915_request_add(rq);
> -			}
> -		}
> -
> -		err = i915_gem_switch_to_kernel_context(i915,
> -							i915->gt.active_engines);
> -		if (err)
> -			return err;
> -
> -		if (!from_idle) {
> -			err = i915_gem_wait_for_idle(i915,
> -						     I915_WAIT_LOCKED,
> -						     MAX_SCHEDULE_TIMEOUT);
> -			if (err)
> -				return err;
> -		}
> -
> -		if (i915->gt.active_requests) {
> -			pr_err("%d active requests remain after switching to kernel context, pass %d (%s) on %s engine%s\n",
> -			       i915->gt.active_requests,
> -			       pass, from_idle ? "idle" : "busy",
> -			       __engine_name(i915, engines),
> -			       is_power_of_2(engines) ? "" : "s");
> -			return -EINVAL;
> -		}
> -
> -		/* XXX Bonus points for proving we are the kernel context! */
> -
> -		mutex_unlock(&i915->drm.struct_mutex);
> -		drain_delayed_work(&i915->gem.idle_work);
> -		mutex_lock(&i915->drm.struct_mutex);
> -	}
> -
> -	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> -		return -EIO;
> -
> -	return 0;
> -}
> -
> -static int igt_switch_to_kernel_context(void *arg)
> -{
> -	struct drm_i915_private *i915 = arg;
> -	struct intel_engine_cs *engine;
> -	struct i915_gem_context *ctx;
> -	enum intel_engine_id id;
> -	intel_wakeref_t wakeref;
> -	int err;
> -
> -	/*
> -	 * A core premise of switching to the kernel context is that
> -	 * if an engine is already idling in the kernel context, we
> -	 * do not emit another request and wake it up. The other being
> -	 * that we do indeed end up idling in the kernel context.
> -	 */
> -
> -	mutex_lock(&i915->drm.struct_mutex);
> -	wakeref = intel_runtime_pm_get(i915);
> -
> -	ctx = kernel_context(i915);
> -	if (IS_ERR(ctx)) {
> -		mutex_unlock(&i915->drm.struct_mutex);
> -		return PTR_ERR(ctx);
> -	}
> -
> -	/* First check idling each individual engine */
> -	for_each_engine(engine, i915, id) {
> -		err = __igt_switch_to_kernel_context(i915, ctx, BIT(id));
> -		if (err)
> -			goto out_unlock;
> -	}
> -
> -	/* Now en masse */
> -	err = __igt_switch_to_kernel_context(i915, ctx, ALL_ENGINES);
> -	if (err)
> -		goto out_unlock;
> -
> -out_unlock:
> -	GEM_TRACE_DUMP_ON(err);
> -
> -	intel_runtime_pm_put(i915, wakeref);
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
> -	kernel_context_close(ctx);
> -	return err;
> -}
> -
>   static void mock_barrier_task(void *data)
>   {
>   	unsigned int *counter = data;
> @@ -1729,7 +1622,6 @@ static int mock_context_barrier(void *arg)
>   	struct drm_i915_private *i915 = arg;
>   	struct i915_gem_context *ctx;
>   	struct i915_request *rq;
> -	intel_wakeref_t wakeref;
>   	unsigned int counter;
>   	int err;
>   
> @@ -1772,9 +1664,7 @@ static int mock_context_barrier(void *arg)
>   		goto out;
>   	}
>   
> -	rq = ERR_PTR(-ENODEV);
> -	with_intel_runtime_pm(i915, wakeref)
> -		rq = i915_request_alloc(i915->engine[RCS0], ctx);
> +	rq = i915_request_alloc(i915->engine[RCS0], ctx);
>   	if (IS_ERR(rq)) {
>   		pr_err("Request allocation failed!\n");
>   		goto out;
> @@ -1824,7 +1714,6 @@ static int mock_context_barrier(void *arg)
>   int i915_gem_context_mock_selftests(void)
>   {
>   	static const struct i915_subtest tests[] = {
> -		SUBTEST(igt_switch_to_kernel_context),
>   		SUBTEST(mock_context_barrier),
>   	};
>   	struct drm_i915_private *i915;
> @@ -1843,7 +1732,6 @@ int i915_gem_context_mock_selftests(void)
>   int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
>   {
>   	static const struct i915_subtest tests[] = {
> -		SUBTEST(igt_switch_to_kernel_context),
>   		SUBTEST(live_nop_switch),
>   		SUBTEST(igt_ctx_exec),
>   		SUBTEST(igt_ctx_readonly),
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index 12203d665a4e..088b2aa05dcd 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -24,6 +24,7 @@
>   
>   #include "../i915_selftest.h"
>   
> +#include "igt_flush_test.h"
>   #include "mock_gem_device.h"
>   #include "huge_gem_object.h"
>   
> @@ -505,19 +506,23 @@ static void disable_retire_worker(struct drm_i915_private *i915)
>   {
>   	i915_gem_shrinker_unregister(i915);
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> -	if (!i915->gt.active_requests++) {
> -		intel_wakeref_t wakeref;
> -
> -		with_intel_runtime_pm(i915, wakeref)
> -			i915_gem_unpark(i915);
> -	}
> -	mutex_unlock(&i915->drm.struct_mutex);
> +	intel_gt_pm_get(i915);
>   
>   	cancel_delayed_work_sync(&i915->gem.retire_work);
>   	cancel_delayed_work_sync(&i915->gem.idle_work);
>   }
>   
> +static void restore_retire_worker(struct drm_i915_private *i915)
> +{
> +	intel_gt_pm_put(i915);
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	igt_flush_test(i915, I915_WAIT_LOCKED);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +
> +	i915_gem_shrinker_register(i915);
> +}
> +
>   static int igt_mmap_offset_exhaustion(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> @@ -615,13 +620,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
>   out:
>   	drm_mm_remove_node(&resv);
>   out_park:
> -	mutex_lock(&i915->drm.struct_mutex);
> -	if (--i915->gt.active_requests)
> -		queue_delayed_work(i915->wq, &i915->gem.retire_work, 0);
> -	else
> -		queue_delayed_work(i915->wq, &i915->gem.idle_work, 0);
> -	mutex_unlock(&i915->drm.struct_mutex);
> -	i915_gem_shrinker_register(i915);
> +	restore_retire_worker(i915);
>   	return err;
>   err_obj:
>   	i915_gem_object_put(obj);
> diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> index 94aee4071a66..e42f3c58536a 100644
> --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> @@ -11,23 +11,29 @@
>   
>   int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
>   {
> +	int ret = i915_terminally_wedged(i915) ? -EIO : 0;
> +	int repeat = !!(flags & I915_WAIT_LOCKED);
> +
>   	cond_resched();
>   
> -	if (flags & I915_WAIT_LOCKED &&
> -	    i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines)) {
> -		pr_err("Failed to switch back to kernel context; declaring wedged\n");
> -		i915_gem_set_wedged(i915);
> -	}
> +	do {
> +		if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
> +			pr_err("%pS timed out, cancelling all further testing.\n",
> +			       __builtin_return_address(0));
>   
> -	if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
> -		pr_err("%pS timed out, cancelling all further testing.\n",
> -		       __builtin_return_address(0));
> +			GEM_TRACE("%pS timed out.\n",
> +				  __builtin_return_address(0));
> +			GEM_TRACE_DUMP();
>   
> -		GEM_TRACE("%pS timed out.\n", __builtin_return_address(0));
> -		GEM_TRACE_DUMP();
> +			i915_gem_set_wedged(i915);
> +			repeat = 0;
> +			ret = -EIO;
> +		}
>   
> -		i915_gem_set_wedged(i915);
> -	}
> +		/* Ensure we also flush after wedging. */
> +		if (flags & I915_WAIT_LOCKED)
> +			i915_retire_requests(i915);
> +	} while (repeat--);
>   
> -	return i915_terminally_wedged(i915);
> +	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index fb677b4019a0..c072424c6b7c 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -41,11 +41,10 @@ void mock_device_flush(struct drm_i915_private *i915)
>   
>   	lockdep_assert_held(&i915->drm.struct_mutex);
>   
> -	for_each_engine(engine, i915, id)
> -		mock_engine_flush(engine);
> -
> -	i915_retire_requests(i915);
> -	GEM_BUG_ON(i915->gt.active_requests);
> +	do {
> +		for_each_engine(engine, i915, id)
> +			mock_engine_flush(engine);
> +	} while (i915_retire_requests(i915));
>   }
>   
>   static void mock_device_release(struct drm_device *dev)
> @@ -110,10 +109,6 @@ static void mock_retire_work_handler(struct work_struct *work)
>   
>   static void mock_idle_work_handler(struct work_struct *work)
>   {
> -	struct drm_i915_private *i915 =
> -		container_of(work, typeof(*i915), gem.idle_work.work);
> -
> -	i915->gt.active_engines = 0;
>   }
>   
>   static int pm_domain_resume(struct device *dev)
> @@ -185,6 +180,8 @@ struct drm_i915_private *mock_gem_device(void)
>   
>   	mock_uncore_init(&i915->uncore);
>   	i915_gem_init__mm(i915);
> +	intel_gt_pm_init(i915);
> +	atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */
>   
>   	init_waitqueue_head(&i915->gpu_error.wait_queue);
>   	init_waitqueue_head(&i915->gpu_error.reset_queue);
> 

As said before concept is very elegant and I like it.

But it is a monster refactor and as much as did cross-reference the diff 
versus the patched tree to get a full picture I have to say my review is 
more about high level and trusting the CI to catch any details. :I

My main concernig is lock nesting, especially the nested annotation in 
the preceding patch. Does lockdep catch anything if you don't have that 
annotation?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/32] drm/i915: Invert the GEM wakeref hierarchy
  2019-04-18 12:42   ` Tvrtko Ursulin
@ 2019-04-18 13:07     ` Chris Wilson
  2019-04-18 13:22       ` Chris Wilson
  0 siblings, 1 reply; 68+ messages in thread
From: Chris Wilson @ 2019-04-18 13:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-18 13:42:59)
> 
> On 17/04/2019 08:56, Chris Wilson wrote:
> > +static bool switch_to_kernel_context(struct intel_engine_cs *engine)
> > +{
> > +     struct i915_request *rq;
> > +
> > +     /* Already inside the kernel context, safe to power down. */
> > +     if (engine->wakeref_serial == engine->serial)
> > +             return true;
> > +
> > +     /* GPU is pointing to the void, as good as in the kernel context. */
> > +     if (i915_reset_failed(engine->i915))
> > +             return true;
> > +
> > +     /*
> > +      * Note, we do this without taking the timeline->mutex. We cannot
> > +      * as we may be called while retiring the kernel context and so
> > +      * already underneath the timeline->mutex. Instead we rely on the
> > +      * exclusive property of the intel_engine_park that prevents anyone
> > +      * else from creating a request on this engine. This also requires
> > +      * that the ring is empty and we avoid any waits while constructing
> > +      * the context, as they assume protection by the timeline->mutex.
> > +      * This should hold true as we can only park the engine after
> > +      * retiring the last request, thus all rings should be empty and
> > +      * all timelines idle.
> > +      */
> > +     rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
> > +     if (IS_ERR(rq))
> > +             /* Context switch failed, hope for the best! Maybe reset? */
> > +             return true;
> > +
> > +     /* Check again on the next retirement. */
> > +     engine->wakeref_serial = engine->serial + 1;
> 
> Is engine->serial guaranteed to be stable at this point? I guess so 
> since there can only be one park at a time.

Yes, we're inside the engine unpark routine so we are serialised against
all other users of the engine.

> > +     __i915_request_commit(rq);
> > +
> > +     return false;
> > +}
> > +
> > +static int intel_engine_park(struct intel_wakeref *wf)
> > +{
> > +     struct intel_engine_cs *engine =
> > +             container_of(wf, typeof(*engine), wakeref);
> > +
> > +     /*
> > +      * If one and only one request is completed between pm events,
> > +      * we know that we are inside the kernel context and it is
> > +      * safe to power down. (We are paranoid in case that runtime
> > +      * suspend causes corruption to the active context image, and
> > +      * want to avoid that impacting userspace.)
> > +      */
> > +     if (!switch_to_kernel_context(engine))
> > +             return -EBUSY;
> 
> But it is ignored by intel_engine_pm_put. Should it be a WARN_ON or 
> something?

The intel_wakeref takes action and defers the put/parking. That's all we
need here, as the GEM layer stays awake with its background retire
worker still poking occasionally.

> > @@ -718,6 +721,7 @@ static void reset_prepare(struct drm_i915_private *i915)
> >       struct intel_engine_cs *engine;
> >       enum intel_engine_id id;
> >   
> > +     intel_gt_pm_get(i915);
> 
> It's not in the spirit of the patch to let engines wake up the gt?

You could, this was just because I liked the look of it. In an operation
affecting all engines, it felt the right thing to do.

> >       for_each_engine(engine, i915, id)
> >               reset_prepare_engine(engine);
> >   
> > @@ -755,48 +759,10 @@ static int gt_reset(struct drm_i915_private *i915,
> >   static void reset_finish_engine(struct intel_engine_cs *engine)
> >   {
> >       engine->reset.finish(engine);
> > +     intel_engine_pm_put(engine);
> >       intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
> >   }
> >   
> > -struct i915_gpu_restart {
> > -     struct work_struct work;
> > -     struct drm_i915_private *i915;
> > -};
> > -
> > -static void restart_work(struct work_struct *work)
> 
> Oh wow I did not see this part of the code so far. Ask a second pair of 
> eyes to check on it?

This is the best part! Resolved a very, very annoying thorn with the
reset requiring a worker.

> > @@ -1137,6 +1076,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
> >       GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
> >       GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
> >   
> > +     if (!intel_wakeref_active(&engine->wakeref))
> > +             return 0;
> 
> I guess there can't be any races here since stuck engine can't be 
> parked. Do we have any tests which trigger this without a guilty 
> request? I kind of remember that isn't possible so probably not.

We do, we have the reset idle engine selftests. Not that it proves very
much, just that we don't die.

> > -static int init_render_ring(struct intel_engine_cs *engine)
> > +static int rcs_resume(struct intel_engine_cs *engine)
> >   {
> >       struct drm_i915_private *dev_priv = engine->i915;
> > -     int ret = init_ring_common(engine);
> > -     if (ret)
> > -             return ret;
> >   
> >       /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
> >       if (IS_GEN_RANGE(dev_priv, 4, 6))
> > @@ -875,7 +875,7 @@ static int init_render_ring(struct intel_engine_cs *engine)
> >       if (INTEL_GEN(dev_priv) >= 6)
> >               ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
> >   
> > -     return 0;
> > +     return xcs_resume(engine);
> 
> This inverts the order between the common and rcs init. One thing which 
> jump out is the RING_IMR which is now done after starting the engine. 
> Can we lose an interrupt now?

That write shouldn't be there, we take care of that inside the restart.

> >   static void idle_work_handler(struct work_struct *work)
> >   {
> >       struct drm_i915_private *i915 =
> >               container_of(work, typeof(*i915), gem.idle_work.work);
> > -     bool rearm_hangcheck;
> > -
> > -     if (!READ_ONCE(i915->gt.awake))
> > -             return;
> > -
> > -     if (READ_ONCE(i915->gt.active_requests))
> > -             return;
> > -
> > -     rearm_hangcheck =
> > -             cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
> >   
> >       if (!mutex_trylock(&i915->drm.struct_mutex)) {
> 
> Should struct_mutex be taken by i915_gem_park, inside the wakeref lock? 
> Or that would create a lock inversion somewhere else?

NO! Yes, that would create the most mighty of inversions! Never, ever,
ever, take struct_mutex inside another lock, for its the outer lock for
the entire driver (give or take an insignificant amount).

About the only lock it is under is the mmap_sem, and look at the pain
that causes. :(

> > +bool i915_retire_requests(struct drm_i915_private *i915)
> >   {
> >       struct intel_ring *ring, *tmp;
> >   
> >       lockdep_assert_held(&i915->drm.struct_mutex);
> >   
> > -     if (!i915->gt.active_requests)
> > -             return;
> 
> You don't want to replace this with a wakeref_active check?

I didn't feel it was worth it in the short term. At the end of the day,
the main caller of i915_retire_requests() should be the retirement
worker, with a perhaps a call from the shrinker (but hopefully not).

[snip]

> As said before concept is very elegant and I like it.
> 
> But it is a monster refactor and as much as did cross-reference the diff 
> versus the patched tree to get a full picture I have to say my review is 
> more about high level and trusting the CI to catch any details. :I
> 
> My main concernig is lock nesting, especially the nested annotation in 
> the preceding patch. Does lockdep catch anything if you don't have that 
> annotation?

Yes. The shrinker calls the intel_wakeref_put, but we need to take more
locks inside the intel_wakeref_get (the pin_map, and more in unpark).
Hence they get caught in the same lock_map and lockdep gets quite angry
even though they cannot overlap.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/32] drm/i915: Invert the GEM wakeref hierarchy
  2019-04-18 13:07     ` Chris Wilson
@ 2019-04-18 13:22       ` Chris Wilson
  0 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-18 13:22 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2019-04-18 14:07:49)
> Quoting Tvrtko Ursulin (2019-04-18 13:42:59)
> > 
> > On 17/04/2019 08:56, Chris Wilson wrote:
> > > -static int init_render_ring(struct intel_engine_cs *engine)
> > > +static int rcs_resume(struct intel_engine_cs *engine)
> > >   {
> > >       struct drm_i915_private *dev_priv = engine->i915;
> > > -     int ret = init_ring_common(engine);
> > > -     if (ret)
> > > -             return ret;
> > >   
> > >       /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
> > >       if (IS_GEN_RANGE(dev_priv, 4, 6))
> > > @@ -875,7 +875,7 @@ static int init_render_ring(struct intel_engine_cs *engine)
> > >       if (INTEL_GEN(dev_priv) >= 6)
> > >               ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
> > >   
> > > -     return 0;
> > > +     return xcs_resume(engine);
> > 
> > This inverts the order between the common and rcs init. One thing which 
> > jump out is the RING_IMR which is now done after starting the engine. 
> > Can we lose an interrupt now?
> 
> That write shouldn't be there, we take care of that inside the restart.

Even more to the point, they should have been set before we started the
engine!

Eeek, that looks to be really funky old behaviour.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/
  2019-04-18 12:04   ` Joonas Lahtinen
@ 2019-04-23  8:57     ` Joonas Lahtinen
  2019-04-23  9:40       ` Jani Nikula
  0 siblings, 1 reply; 68+ messages in thread
From: Joonas Lahtinen @ 2019-04-23  8:57 UTC (permalink / raw)
  To: Chris Wilson, Jani Nikula, Rodrigo Vivi, intel-gfx

Quoting Joonas Lahtinen (2019-04-18 15:04:49)
> + Jani and Rodrigo to comment

No objection here and drm-intel-next was freshly tagged, so this is:

Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas

> 
> I'm definitely all for doing this, so it's only a matter of the timing.
> 
> Question is, do we want to do it right now after last drm-intel-next was
> tagged, or do we want to wait a couple of release candidates.
> 
> I'm leaning towards doing this ASAP, as git cherry-pick should
> understand that they're just renames, so there should be no issue with
> doing the -fixes.
> 
> Regards, Joonas
> 
> Quoting Chris Wilson (2019-04-17 10:56:32)
> > Start partitioning off the code that talks to the hardware (GT) from the
> > uapi layers and move the device facing code under gt/
> > 
> > One casualty is s/intel_ringbuffer.h/intel_engine.h/ with the plan to
> > subdivide that header and body further (and split out the submission
> > code from the ringbuffer and logical context handling). This patch aims
> > to be simple motion so git can fixup inflight patches with little mess.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/Makefile                 | 46 ++++++++++++-------
> >  drivers/gpu/drm/i915/Makefile.header-test     |  6 +--
> >  drivers/gpu/drm/i915/gt/Makefile              |  2 +
> >  drivers/gpu/drm/i915/gt/Makefile.header-test  | 16 +++++++
> >  .../gpu/drm/i915/{ => gt}/intel_breadcrumbs.c |  0
> >  drivers/gpu/drm/i915/{ => gt}/intel_context.c |  3 +-
> >  drivers/gpu/drm/i915/{ => gt}/intel_context.h |  0
> >  .../drm/i915/{ => gt}/intel_context_types.h   |  0
> >  .../{intel_ringbuffer.h => gt/intel_engine.h} |  0
> >  .../gpu/drm/i915/{ => gt}/intel_engine_cs.c   |  8 ++--
> >  .../drm/i915/{ => gt}/intel_engine_types.h    |  5 +-
> >  .../drm/i915/{ => gt}/intel_gpu_commands.h    |  0
> >  .../gpu/drm/i915/{ => gt}/intel_hangcheck.c   |  4 +-
> >  drivers/gpu/drm/i915/{ => gt}/intel_lrc.c     |  5 +-
> >  drivers/gpu/drm/i915/{ => gt}/intel_lrc.h     |  4 +-
> >  drivers/gpu/drm/i915/{ => gt}/intel_lrc_reg.h |  0
> >  drivers/gpu/drm/i915/{ => gt}/intel_mocs.c    |  4 +-
> >  drivers/gpu/drm/i915/{ => gt}/intel_mocs.h    |  4 +-
> >  .../i915/{i915_reset.c => gt/intel_reset.c}   |  2 +-
> >  .../i915/{i915_reset.h => gt/intel_reset.h}   |  2 +-
> >  .../gpu/drm/i915/{ => gt}/intel_ringbuffer.c  |  3 +-
> >  drivers/gpu/drm/i915/{ => gt}/intel_sseu.c    |  0
> >  drivers/gpu/drm/i915/{ => gt}/intel_sseu.h    |  0
> >  .../gpu/drm/i915/{ => gt}/intel_workarounds.c |  2 +-
> >  .../gpu/drm/i915/{ => gt}/intel_workarounds.h |  8 +++-
> >  .../i915/{ => gt}/intel_workarounds_types.h   |  0
> >  .../drm/i915/{selftests => gt}/mock_engine.c  | 10 ++--
> >  .../drm/i915/{selftests => gt}/mock_engine.h  |  2 +-
> >  .../selftest_engine_cs.c}                     |  0
> >  .../selftest_hangcheck.c}                     | 16 +++----
> >  .../intel_lrc.c => gt/selftest_lrc.c}         | 16 +++----
> >  .../selftest_workarounds.c}                   | 18 ++++----
> >  drivers/gpu/drm/i915/i915_cmd_parser.c        |  3 +-
> >  drivers/gpu/drm/i915/i915_debugfs.c           |  3 +-
> >  drivers/gpu/drm/i915/i915_drv.c               |  5 +-
> >  drivers/gpu/drm/i915/i915_drv.h               |  7 +--
> >  drivers/gpu/drm/i915/i915_gem.c               |  7 +--
> >  drivers/gpu/drm/i915/i915_gem_context.c       |  7 ++-
> >  drivers/gpu/drm/i915/i915_gem_context.h       |  3 +-
> >  drivers/gpu/drm/i915/i915_gem_context_types.h |  3 +-
> >  drivers/gpu/drm/i915/i915_gem_gtt.c           |  1 -
> >  drivers/gpu/drm/i915/i915_gem_gtt.h           |  2 +-
> >  drivers/gpu/drm/i915/i915_gpu_error.h         |  3 +-
> >  drivers/gpu/drm/i915/i915_perf.c              |  3 +-
> >  drivers/gpu/drm/i915/i915_pmu.c               |  4 +-
> >  drivers/gpu/drm/i915/i915_request.c           |  1 -
> >  drivers/gpu/drm/i915/i915_scheduler_types.h   |  2 +-
> >  drivers/gpu/drm/i915/i915_trace.h             |  3 +-
> >  drivers/gpu/drm/i915/i915_vma.c               |  3 +-
> >  drivers/gpu/drm/i915/intel_device_info.h      |  6 ++-
> >  drivers/gpu/drm/i915/intel_display.c          |  1 -
> >  drivers/gpu/drm/i915/intel_guc_submission.c   |  3 +-
> >  drivers/gpu/drm/i915/intel_guc_submission.h   |  3 +-
> >  drivers/gpu/drm/i915/intel_uc.c               |  2 +-
> >  .../gpu/drm/i915/selftests/i915_gem_context.c |  5 +-
> >  drivers/gpu/drm/i915/selftests/igt_reset.c    |  3 +-
> >  drivers/gpu/drm/i915/selftests/igt_spinner.h  |  3 +-
> >  .../gpu/drm/i915/selftests/mock_gem_device.c  |  3 +-
> >  drivers/gpu/drm/i915/selftests/mock_request.c |  3 +-
> >  59 files changed, 166 insertions(+), 112 deletions(-)
> >  create mode 100644 drivers/gpu/drm/i915/gt/Makefile
> >  create mode 100644 drivers/gpu/drm/i915/gt/Makefile.header-test
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_breadcrumbs.c (100%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_context.c (99%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_context.h (100%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_context_types.h (100%)
> >  rename drivers/gpu/drm/i915/{intel_ringbuffer.h => gt/intel_engine.h} (100%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_engine_cs.c (99%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_engine_types.h (99%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_gpu_commands.h (100%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_hangcheck.c (99%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_lrc.c (99%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_lrc.h (98%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_lrc_reg.h (100%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_mocs.c (99%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_mocs.h (97%)
> >  rename drivers/gpu/drm/i915/{i915_reset.c => gt/intel_reset.c} (99%)
> >  rename drivers/gpu/drm/i915/{i915_reset.h => gt/intel_reset.h} (98%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_ringbuffer.c (99%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_sseu.c (100%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_sseu.h (100%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds.c (99%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds.h (88%)
> >  rename drivers/gpu/drm/i915/{ => gt}/intel_workarounds_types.h (100%)
> >  rename drivers/gpu/drm/i915/{selftests => gt}/mock_engine.c (97%)
> >  rename drivers/gpu/drm/i915/{selftests => gt}/mock_engine.h (98%)
> >  rename drivers/gpu/drm/i915/{selftests/intel_engine_cs.c => gt/selftest_engine_cs.c} (100%)
> >  rename drivers/gpu/drm/i915/{selftests/intel_hangcheck.c => gt/selftest_hangcheck.c} (99%)
> >  rename drivers/gpu/drm/i915/{selftests/intel_lrc.c => gt/selftest_lrc.c} (99%)
> >  rename drivers/gpu/drm/i915/{selftests/intel_workarounds.c => gt/selftest_workarounds.c} (98%)
> > 
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 53ff209b91bb..40130cf5c003 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -35,32 +35,53 @@ subdir-ccflags-y += \
> >  # Extra header tests
> >  include $(src)/Makefile.header-test
> >  
> > +subdir-ccflags-y += -I$(src)
> > +
> >  # Please keep these build lists sorted!
> >  
> >  # core driver code
> >  i915-y += i915_drv.o \
> >           i915_irq.o \
> > -         i915_memcpy.o \
> > -         i915_mm.o \
> >           i915_params.o \
> >           i915_pci.o \
> > -         i915_reset.o \
> >           i915_suspend.o \
> > -         i915_sw_fence.o \
> > -         i915_syncmap.o \
> >           i915_sysfs.o \
> > -         i915_user_extensions.o \
> >           intel_csr.o \
> >           intel_device_info.o \
> >           intel_pm.o \
> >           intel_runtime_pm.o \
> > -         intel_workarounds.o
> > +         intel_uncore.o
> > +
> > +# core library code
> > +i915-y += \
> > +       i915_memcpy.o \
> > +       i915_mm.o \
> > +       i915_sw_fence.o \
> > +       i915_syncmap.o \
> > +       i915_user_extensions.o
> >  
> >  i915-$(CONFIG_COMPAT)   += i915_ioc32.o
> >  i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
> >  i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
> >  
> > -# GEM code
> > +# "Graphics Technology" (aka we talk to the gpu)
> > +obj-y += gt/
> > +gt-y += \
> > +       gt/intel_breadcrumbs.o \
> > +       gt/intel_context.o \
> > +       gt/intel_engine_cs.o \
> > +       gt/intel_hangcheck.o \
> > +       gt/intel_lrc.o \
> > +       gt/intel_reset.o \
> > +       gt/intel_ringbuffer.o \
> > +       gt/intel_mocs.o \
> > +       gt/intel_sseu.o \
> > +       gt/intel_workarounds.o
> > +gt-$(CONFIG_DRM_I915_SELFTEST) += \
> > +       gt/mock_engine.o
> > +i915-y += $(gt-y)
> > +
> > +# GEM (Graphics Execution Management) code
> >  i915-y += \
> >           i915_active.o \
> >           i915_cmd_parser.o \
> > @@ -88,15 +109,6 @@ i915-y += \
> >           i915_timeline.o \
> >           i915_trace_points.o \
> >           i915_vma.o \
> > -         intel_breadcrumbs.o \
> > -         intel_context.o \
> > -         intel_engine_cs.o \
> > -         intel_hangcheck.o \
> > -         intel_lrc.o \
> > -         intel_mocs.o \
> > -         intel_ringbuffer.o \
> > -         intel_sseu.o \
> > -         intel_uncore.o \
> >           intel_wopcm.o
> >  
> >  # general-purpose microcontroller (GuC) support
> > diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
> > index 5bcc78d7ac96..96a5d90629ec 100644
> > --- a/drivers/gpu/drm/i915/Makefile.header-test
> > +++ b/drivers/gpu/drm/i915/Makefile.header-test
> > @@ -13,13 +13,11 @@ header_test := \
> >         intel_cdclk.h \
> >         intel_color.h \
> >         intel_connector.h \
> > -       intel_context_types.h \
> >         intel_crt.h \
> >         intel_csr.h \
> >         intel_ddi.h \
> >         intel_dp.h \
> >         intel_dvo.h \
> > -       intel_engine_types.h \
> >         intel_fbc.h \
> >         intel_fbdev.h \
> >         intel_frontbuffer.h \
> > @@ -33,9 +31,7 @@ header_test := \
> >         intel_psr.h \
> >         intel_sdvo.h \
> >         intel_sprite.h \
> > -       intel_sseu.h \
> > -       intel_tv.h \
> > -       intel_workarounds_types.h
> > +       intel_tv.h
> >  
> >  quiet_cmd_header_test = HDRTEST $@
> >        cmd_header_test = echo "\#include \"$(<F)\"" > $@
> > diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile
> > new file mode 100644
> > index 000000000000..1c75b5c9790c
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/gt/Makefile
> > @@ -0,0 +1,2 @@
> > +# Extra header tests
> > +include $(src)/Makefile.header-test
> > diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test
> > new file mode 100644
> > index 000000000000..61e06cbb4b32
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/gt/Makefile.header-test
> > @@ -0,0 +1,16 @@
> > +# SPDX-License-Identifier: MIT
> > +# Copyright © 2019 Intel Corporation
> > +
> > +# Test the headers are compilable as standalone units
> > +header_test := $(notdir $(wildcard $(src)/*.h))
> > +
> > +quiet_cmd_header_test = HDRTEST $@
> > +      cmd_header_test = echo "\#include \"$(<F)\"" > $@
> > +
> > +header_test_%.c: %.h
> > +       $(call cmd,header_test)
> > +
> > +extra-$(CONFIG_DRM_I915_WERROR) += \
> > +       $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
> > +
> > +clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
> > diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > similarity index 100%
> > rename from drivers/gpu/drm/i915/intel_breadcrumbs.c
> > rename to drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/intel_context.c
> > rename to drivers/gpu/drm/i915/gt/intel_context.c
> > index 961d1445833d..ebd1e5919a4a 100644
> > --- a/drivers/gpu/drm/i915/intel_context.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> > @@ -7,8 +7,9 @@
> >  #include "i915_drv.h"
> >  #include "i915_gem_context.h"
> >  #include "i915_globals.h"
> > +
> >  #include "intel_context.h"
> > -#include "intel_ringbuffer.h"
> > +#include "intel_engine.h"
> >  
> >  static struct i915_global_context {
> >         struct i915_global base;
> > diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> > similarity index 100%
> > rename from drivers/gpu/drm/i915/intel_context.h
> > rename to drivers/gpu/drm/i915/gt/intel_context.h
> > diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > similarity index 100%
> > rename from drivers/gpu/drm/i915/intel_context_types.h
> > rename to drivers/gpu/drm/i915/gt/intel_context_types.h
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> > similarity index 100%
> > rename from drivers/gpu/drm/i915/intel_ringbuffer.h
> > rename to drivers/gpu/drm/i915/gt/intel_engine.h
> > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/intel_engine_cs.c
> > rename to drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index ad2a683d97f7..21dd3f25e641 100644
> > --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -25,9 +25,10 @@
> >  #include <drm/drm_print.h>
> >  
> >  #include "i915_drv.h"
> > -#include "i915_reset.h"
> > -#include "intel_ringbuffer.h"
> > +
> > +#include "intel_engine.h"
> >  #include "intel_lrc.h"
> > +#include "intel_reset.h"
> >  
> >  /* Haswell does have the CXT_SIZE register however it does not appear to be
> >   * valid. Now, docs explain in dwords what is in the context object. The full
> > @@ -1756,6 +1757,5 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
> >  }
> >  
> >  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> > -#include "selftests/mock_engine.c"
> > -#include "selftests/intel_engine_cs.c"
> > +#include "selftest_engine_cs.c"
> >  #endif
> > diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/intel_engine_types.h
> > rename to drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index d07a01b3ed0b..3adf58da6d2c 100644
> > --- a/drivers/gpu/drm/i915/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -14,15 +14,14 @@
> >  #include <linux/types.h>
> >  
> >  #include "i915_gem.h"
> > +#include "i915_gem_batch_pool.h"
> > +#include "i915_pmu.h"
> >  #include "i915_priolist_types.h"
> >  #include "i915_selftest.h"
> >  #include "i915_timeline_types.h"
> >  #include "intel_sseu.h"
> >  #include "intel_workarounds_types.h"
> >  
> > -#include "i915_gem_batch_pool.h"
> > -#include "i915_pmu.h"
> > -
> >  #define I915_MAX_SLICES        3
> >  #define I915_MAX_SUBSLICES 8
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > similarity index 100%
> > rename from drivers/gpu/drm/i915/intel_gpu_commands.h
> > rename to drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/intel_hangcheck.c
> > rename to drivers/gpu/drm/i915/gt/intel_hangcheck.c
> > index 3d51ed1428d4..3053a706a561 100644
> > --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> > @@ -22,8 +22,8 @@
> >   *
> >   */
> >  
> > +#include "intel_reset.h"
> >  #include "i915_drv.h"
> > -#include "i915_reset.h"
> >  
> >  struct hangcheck {
> >         u64 acthd;
> > @@ -330,5 +330,5 @@ void intel_hangcheck_init(struct drm_i915_private *i915)
> >  }
> >  
> >  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> > -#include "selftests/intel_hangcheck.c"
> > +#include "selftest_hangcheck.c"
> >  #endif
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/intel_lrc.c
> > rename to drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 18a9dc6ca877..5cadf8f6a23d 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -133,13 +133,12 @@
> >   */
> >  #include <linux/interrupt.h>
> >  
> > -#include <drm/i915_drm.h>
> >  #include "i915_drv.h"
> >  #include "i915_gem_render_state.h"
> > -#include "i915_reset.h"
> >  #include "i915_vgpu.h"
> >  #include "intel_lrc_reg.h"
> >  #include "intel_mocs.h"
> > +#include "intel_reset.h"
> >  #include "intel_workarounds.h"
> >  
> >  #define RING_EXECLIST_QFULL            (1 << 0x2)
> > @@ -2905,5 +2904,5 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
> >  }
> >  
> >  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> > -#include "selftests/intel_lrc.c"
> > +#include "selftest_lrc.c"
> >  #endif
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> > similarity index 98%
> > rename from drivers/gpu/drm/i915/intel_lrc.h
> > rename to drivers/gpu/drm/i915/gt/intel_lrc.h
> > index 99f75ee9d087..1a33ec74af8c 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> > @@ -24,8 +24,7 @@
> >  #ifndef _INTEL_LRC_H_
> >  #define _INTEL_LRC_H_
> >  
> > -#include "intel_ringbuffer.h"
> > -#include "i915_gem_context.h"
> > +#include "intel_engine.h"
> >  
> >  /* Execlists regs */
> >  #define RING_ELSP(base)                                _MMIO((base) + 0x230)
> > @@ -99,7 +98,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
> >  struct drm_printer;
> >  
> >  struct drm_i915_private;
> > -struct i915_gem_context;
> >  
> >  void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> > similarity index 100%
> > rename from drivers/gpu/drm/i915/intel_lrc_reg.h
> > rename to drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/intel_mocs.c
> > rename to drivers/gpu/drm/i915/gt/intel_mocs.c
> > index 274ba78500c0..79df66022d3a 100644
> > --- a/drivers/gpu/drm/i915/intel_mocs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > @@ -20,9 +20,11 @@
> >   * SOFTWARE.
> >   */
> >  
> > +#include "i915_drv.h"
> > +
> > +#include "intel_engine.h"
> >  #include "intel_mocs.h"
> >  #include "intel_lrc.h"
> > -#include "intel_ringbuffer.h"
> >  
> >  /* structures required */
> >  struct drm_i915_mocs_entry {
> > diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
> > similarity index 97%
> > rename from drivers/gpu/drm/i915/intel_mocs.h
> > rename to drivers/gpu/drm/i915/gt/intel_mocs.h
> > index 3d99d1271b2b..0913704a1af2 100644
> > --- a/drivers/gpu/drm/i915/intel_mocs.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
> > @@ -49,7 +49,9 @@
> >   * context handling keep the MOCS in step.
> >   */
> >  
> > -#include "i915_drv.h"
> > +struct drm_i915_private;
> > +struct i915_request;
> > +struct intel_engine_cs;
> >  
> >  int intel_rcs_context_init_mocs(struct i915_request *rq);
> >  void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv);
> > diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/i915_reset.c
> > rename to drivers/gpu/drm/i915/gt/intel_reset.c
> > index 677d59304e78..9731a2295639 100644
> > --- a/drivers/gpu/drm/i915/i915_reset.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> > @@ -9,7 +9,7 @@
> >  
> >  #include "i915_drv.h"
> >  #include "i915_gpu_error.h"
> > -#include "i915_reset.h"
> > +#include "intel_reset.h"
> >  
> >  #include "intel_guc.h"
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
> > similarity index 98%
> > rename from drivers/gpu/drm/i915/i915_reset.h
> > rename to drivers/gpu/drm/i915/gt/intel_reset.h
> > index 3c0450289b8f..8e662bb43a9b 100644
> > --- a/drivers/gpu/drm/i915/i915_reset.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_reset.h
> > @@ -11,7 +11,7 @@
> >  #include <linux/types.h>
> >  #include <linux/srcu.h>
> >  
> > -#include "intel_engine_types.h"
> > +#include "gt/intel_engine_types.h"
> >  
> >  struct drm_i915_private;
> >  struct i915_request;
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/intel_ringbuffer.c
> > rename to drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> > index 029fd8ec1857..c1214fd25702 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> > @@ -33,9 +33,8 @@
> >  
> >  #include "i915_drv.h"
> >  #include "i915_gem_render_state.h"
> > -#include "i915_reset.h"
> >  #include "i915_trace.h"
> > -#include "intel_drv.h"
> > +#include "intel_reset.h"
> >  #include "intel_workarounds.h"
> >  
> >  /* Rough estimate of the typical request size, performing a flush,
> > diff --git a/drivers/gpu/drm/i915/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> > similarity index 100%
> > rename from drivers/gpu/drm/i915/intel_sseu.c
> > rename to drivers/gpu/drm/i915/gt/intel_sseu.c
> > diff --git a/drivers/gpu/drm/i915/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > similarity index 100%
> > rename from drivers/gpu/drm/i915/intel_sseu.h
> > rename to drivers/gpu/drm/i915/gt/intel_sseu.h
> > diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/intel_workarounds.c
> > rename to drivers/gpu/drm/i915/gt/intel_workarounds.c
> > index b3cbed1ee1c9..f46ed0e2f07c 100644
> > --- a/drivers/gpu/drm/i915/intel_workarounds.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > @@ -1398,5 +1398,5 @@ int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
> >  }
> >  
> >  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> > -#include "selftests/intel_workarounds.c"
> > +#include "selftest_workarounds.c"
> >  #endif
> > diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h
> > similarity index 88%
> > rename from drivers/gpu/drm/i915/intel_workarounds.h
> > rename to drivers/gpu/drm/i915/gt/intel_workarounds.h
> > index fdf7ebb90f28..3761a6ee58bb 100644
> > --- a/drivers/gpu/drm/i915/intel_workarounds.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h
> > @@ -4,13 +4,17 @@
> >   * Copyright © 2014-2018 Intel Corporation
> >   */
> >  
> > -#ifndef _I915_WORKAROUNDS_H_
> > -#define _I915_WORKAROUNDS_H_
> > +#ifndef _INTEL_WORKAROUNDS_H_
> > +#define _INTEL_WORKAROUNDS_H_
> >  
> >  #include <linux/slab.h>
> >  
> >  #include "intel_workarounds_types.h"
> >  
> > +struct drm_i915_private;
> > +struct i915_request;
> > +struct intel_engine_cs;
> > +
> >  static inline void intel_wa_list_free(struct i915_wa_list *wal)
> >  {
> >         kfree(wal->list);
> > diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
> > similarity index 100%
> > rename from drivers/gpu/drm/i915/intel_workarounds_types.h
> > rename to drivers/gpu/drm/i915/gt/intel_workarounds_types.h
> > diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> > similarity index 97%
> > rename from drivers/gpu/drm/i915/selftests/mock_engine.c
> > rename to drivers/gpu/drm/i915/gt/mock_engine.c
> > index 61a8206ed677..414afd2f27fe 100644
> > --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> > +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> > @@ -22,8 +22,11 @@
> >   *
> >   */
> >  
> > +#include "i915_drv.h"
> > +#include "intel_context.h"
> > +
> >  #include "mock_engine.h"
> > -#include "mock_request.h"
> > +#include "selftests/mock_request.h"
> >  
> >  struct mock_ring {
> >         struct intel_ring base;
> > @@ -268,8 +271,9 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
> >         timer_setup(&engine->hw_delay, hw_delay_complete, 0);
> >         INIT_LIST_HEAD(&engine->hw_queue);
> >  
> > -       if (pin_context(i915->kernel_context, &engine->base,
> > -                       &engine->base.kernel_context))
> > +       engine->base.kernel_context =
> > +               intel_context_pin(i915->kernel_context, &engine->base);
> > +       if (IS_ERR(engine->base.kernel_context))
> >                 goto err_breadcrumbs;
> >  
> >         return &engine->base;
> > diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h
> > similarity index 98%
> > rename from drivers/gpu/drm/i915/selftests/mock_engine.h
> > rename to drivers/gpu/drm/i915/gt/mock_engine.h
> > index b9cc3a245f16..44b35a85e9d1 100644
> > --- a/drivers/gpu/drm/i915/selftests/mock_engine.h
> > +++ b/drivers/gpu/drm/i915/gt/mock_engine.h
> > @@ -29,7 +29,7 @@
> >  #include <linux/spinlock.h>
> >  #include <linux/timer.h>
> >  
> > -#include "../intel_ringbuffer.h"
> > +#include "gt/intel_engine.h"
> >  
> >  struct mock_engine {
> >         struct intel_engine_cs base;
> > diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> > similarity index 100%
> > rename from drivers/gpu/drm/i915/selftests/intel_engine_cs.c
> > rename to drivers/gpu/drm/i915/gt/selftest_engine_cs.c
> > diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> > rename to drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> > index 050bd1e19e02..87c26920212f 100644
> > --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> > @@ -24,14 +24,14 @@
> >  
> >  #include <linux/kthread.h>
> >  
> > -#include "../i915_selftest.h"
> > -#include "i915_random.h"
> > -#include "igt_flush_test.h"
> > -#include "igt_reset.h"
> > -#include "igt_wedge_me.h"
> > -
> > -#include "mock_context.h"
> > -#include "mock_drm.h"
> > +#include "i915_selftest.h"
> > +#include "selftests/i915_random.h"
> > +#include "selftests/igt_flush_test.h"
> > +#include "selftests/igt_reset.h"
> > +#include "selftests/igt_wedge_me.h"
> > +
> > +#include "selftests/mock_context.h"
> > +#include "selftests/mock_drm.h"
> >  
> >  #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
> >  
> > diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > similarity index 99%
> > rename from drivers/gpu/drm/i915/selftests/intel_lrc.c
> > rename to drivers/gpu/drm/i915/gt/selftest_lrc.c
> > index fbee030db940..cd0551f97c2f 100644
> > --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> > @@ -6,15 +6,13 @@
> >  
> >  #include <linux/prime_numbers.h>
> >  
> > -#include "../i915_reset.h"
> > -
> > -#include "../i915_selftest.h"
> > -#include "igt_flush_test.h"
> > -#include "igt_live_test.h"
> > -#include "igt_spinner.h"
> > -#include "i915_random.h"
> > -
> > -#include "mock_context.h"
> > +#include "gt/intel_reset.h"
> > +#include "i915_selftest.h"
> > +#include "selftests/i915_random.h"
> > +#include "selftests/igt_flush_test.h"
> > +#include "selftests/igt_live_test.h"
> > +#include "selftests/igt_spinner.h"
> > +#include "selftests/mock_context.h"
> >  
> >  static int live_sanitycheck(void *arg)
> >  {
> > diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> > similarity index 98%
> > rename from drivers/gpu/drm/i915/selftests/intel_workarounds.c
> > rename to drivers/gpu/drm/i915/gt/selftest_workarounds.c
> > index 6f941c31dcab..96c6282f3a10 100644
> > --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> > +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> > @@ -4,15 +4,15 @@
> >   * Copyright © 2018 Intel Corporation
> >   */
> >  
> > -#include "../i915_selftest.h"
> > -#include "../i915_reset.h"
> > -
> > -#include "igt_flush_test.h"
> > -#include "igt_reset.h"
> > -#include "igt_spinner.h"
> > -#include "igt_wedge_me.h"
> > -#include "mock_context.h"
> > -#include "mock_drm.h"
> > +#include "i915_selftest.h"
> > +#include "intel_reset.h"
> > +
> > +#include "selftests/igt_flush_test.h"
> > +#include "selftests/igt_reset.h"
> > +#include "selftests/igt_spinner.h"
> > +#include "selftests/igt_wedge_me.h"
> > +#include "selftests/mock_context.h"
> > +#include "selftests/mock_drm.h"
> >  
> >  static const struct wo_register {
> >         enum intel_platform platform;
> > diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> > index 503d548a55f7..e9fadcb4d592 100644
> > --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> > +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> > @@ -25,8 +25,9 @@
> >   *
> >   */
> >  
> > +#include "gt/intel_engine.h"
> > +
> >  #include "i915_drv.h"
> > -#include "intel_ringbuffer.h"
> >  
> >  /**
> >   * DOC: batch buffer command parser
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 5823ffb17821..3f039758b152 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -32,7 +32,8 @@
> >  #include <drm/drm_debugfs.h>
> >  #include <drm/drm_fourcc.h>
> >  
> > -#include "i915_reset.h"
> > +#include "gt/intel_reset.h"
> > +
> >  #include "intel_dp.h"
> >  #include "intel_drv.h"
> >  #include "intel_fbc.h"
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 1ad88e6d7c04..98b997526daa 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -47,10 +47,12 @@
> >  #include <drm/drm_probe_helper.h>
> >  #include <drm/i915_drm.h>
> >  
> > +#include "gt/intel_workarounds.h"
> > +#include "gt/intel_reset.h"
> > +
> >  #include "i915_drv.h"
> >  #include "i915_pmu.h"
> >  #include "i915_query.h"
> > -#include "i915_reset.h"
> >  #include "i915_trace.h"
> >  #include "i915_vgpu.h"
> >  #include "intel_audio.h"
> > @@ -62,7 +64,6 @@
> >  #include "intel_pm.h"
> >  #include "intel_sprite.h"
> >  #include "intel_uc.h"
> > -#include "intel_workarounds.h"
> >  
> >  static struct drm_driver driver;
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 7b5da9eddc1c..fad5306f07da 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -62,18 +62,19 @@
> >  #include "i915_reg.h"
> >  #include "i915_utils.h"
> >  
> > +#include "gt/intel_lrc.h"
> > +#include "gt/intel_engine.h"
> > +#include "gt/intel_workarounds.h"
> > +
> >  #include "intel_bios.h"
> >  #include "intel_device_info.h"
> >  #include "intel_display.h"
> >  #include "intel_dpll_mgr.h"
> >  #include "intel_frontbuffer.h"
> > -#include "intel_lrc.h"
> >  #include "intel_opregion.h"
> > -#include "intel_ringbuffer.h"
> >  #include "intel_uc.h"
> >  #include "intel_uncore.h"
> >  #include "intel_wopcm.h"
> > -#include "intel_workarounds.h"
> >  
> >  #include "i915_gem.h"
> >  #include "i915_gem_context.h"
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index a5412323fee1..9554960977a3 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -39,19 +39,20 @@
> >  #include <linux/dma-buf.h>
> >  #include <linux/mman.h>
> >  
> > +#include "gt/intel_mocs.h"
> > +#include "gt/intel_reset.h"
> > +#include "gt/intel_workarounds.h"
> > +
> >  #include "i915_drv.h"
> >  #include "i915_gem_clflush.h"
> >  #include "i915_gemfs.h"
> >  #include "i915_globals.h"
> > -#include "i915_reset.h"
> >  #include "i915_trace.h"
> >  #include "i915_vgpu.h"
> >  
> >  #include "intel_drv.h"
> >  #include "intel_frontbuffer.h"
> > -#include "intel_mocs.h"
> >  #include "intel_pm.h"
> > -#include "intel_workarounds.h"
> >  
> >  static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index c02a30612df9..37dff694456c 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -86,13 +86,16 @@
> >   */
> >  
> >  #include <linux/log2.h>
> > +
> >  #include <drm/i915_drm.h>
> > +
> > +#include "gt/intel_lrc_reg.h"
> > +#include "gt/intel_workarounds.h"
> > +
> >  #include "i915_drv.h"
> >  #include "i915_globals.h"
> >  #include "i915_trace.h"
> >  #include "i915_user_extensions.h"
> > -#include "intel_lrc_reg.h"
> > -#include "intel_workarounds.h"
> >  
> >  #define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
> >  #define I915_CONTEXT_PARAM_VM 0x9
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> > index 23dcb01bfd82..cec278ab04e2 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> > @@ -27,9 +27,10 @@
> >  
> >  #include "i915_gem_context_types.h"
> >  
> > +#include "gt/intel_context.h"
> > +
> >  #include "i915_gem.h"
> >  #include "i915_scheduler.h"
> > -#include "intel_context.h"
> >  #include "intel_device_info.h"
> >  
> >  struct drm_device;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> > index e2ec58b10fb2..d282a6ab3b9f 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> > @@ -17,8 +17,9 @@
> >  #include <linux/rcupdate.h>
> >  #include <linux/types.h>
> >  
> > +#include "gt/intel_context_types.h"
> > +
> >  #include "i915_scheduler.h"
> > -#include "intel_context_types.h"
> >  
> >  struct pid;
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 8f460cc4cc1f..aab778728ea2 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -37,7 +37,6 @@
> >  
> >  #include "i915_drv.h"
> >  #include "i915_vgpu.h"
> > -#include "i915_reset.h"
> >  #include "i915_trace.h"
> >  #include "intel_drv.h"
> >  #include "intel_frontbuffer.h"
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > index f597f35b109b..c8d96e91f3dc 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > @@ -38,8 +38,8 @@
> >  #include <linux/mm.h>
> >  #include <linux/pagevec.h>
> >  
> > +#include "gt/intel_reset.h"
> >  #include "i915_request.h"
> > -#include "i915_reset.h"
> >  #include "i915_selftest.h"
> >  #include "i915_timeline.h"
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> > index 5dc761e85d9d..b419d0f59275 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> > @@ -13,8 +13,9 @@
> >  
> >  #include <drm/drm_mm.h>
> >  
> > +#include "gt/intel_engine.h"
> > +
> >  #include "intel_device_info.h"
> > -#include "intel_ringbuffer.h"
> >  #include "intel_uc_fw.h"
> >  
> >  #include "i915_gem.h"
> > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> > index 56da457bed21..a87f790335c1 100644
> > --- a/drivers/gpu/drm/i915/i915_perf.c
> > +++ b/drivers/gpu/drm/i915/i915_perf.c
> > @@ -195,6 +195,8 @@
> >  #include <linux/sizes.h>
> >  #include <linux/uuid.h>
> >  
> > +#include "gt/intel_lrc_reg.h"
> > +
> >  #include "i915_drv.h"
> >  #include "i915_oa_hsw.h"
> >  #include "i915_oa_bdw.h"
> > @@ -210,7 +212,6 @@
> >  #include "i915_oa_cflgt3.h"
> >  #include "i915_oa_cnl.h"
> >  #include "i915_oa_icl.h"
> > -#include "intel_lrc_reg.h"
> >  
> >  /* HW requires this to be a power of two, between 128k and 16M, though driver
> >   * is currently generally designed assuming the largest 16M size is used such
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index 46a52da3db29..35e502481f29 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -6,8 +6,10 @@
> >  
> >  #include <linux/irq.h>
> >  #include <linux/pm_runtime.h>
> > +
> > +#include "gt/intel_engine.h"
> > +
> >  #include "i915_pmu.h"
> > -#include "intel_ringbuffer.h"
> >  #include "i915_drv.h"
> >  
> >  /* Frequency for the sampling timer for events which need it. */
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index e0efc334463b..74ae698c1f95 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -32,7 +32,6 @@
> >  #include "i915_active.h"
> >  #include "i915_drv.h"
> >  #include "i915_globals.h"
> > -#include "i915_reset.h"
> >  #include "intel_pm.h"
> >  
> >  struct execute_cb {
> > diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> > index f1af3916a808..166a457884b2 100644
> > --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> > +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> > @@ -9,8 +9,8 @@
> >  
> >  #include <linux/list.h>
> >  
> > +#include "gt/intel_engine_types.h"
> >  #include "i915_priolist_types.h"
> > -#include "intel_engine_types.h"
> >  
> >  struct drm_i915_private;
> >  struct i915_request;
> > diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> > index 12893304c8f8..b5286f3d8146 100644
> > --- a/drivers/gpu/drm/i915/i915_trace.h
> > +++ b/drivers/gpu/drm/i915/i915_trace.h
> > @@ -8,9 +8,10 @@
> >  
> >  #include <drm/drm_drv.h>
> >  
> > +#include "gt/intel_engine.h"
> > +
> >  #include "i915_drv.h"
> >  #include "intel_drv.h"
> > -#include "intel_ringbuffer.h"
> >  
> >  #undef TRACE_SYSTEM
> >  #define TRACE_SYSTEM i915
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index 36726392e737..d4d308b6d1d8 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -22,11 +22,12 @@
> >   *
> >   */
> >  
> > +#include "gt/intel_engine.h"
> > +
> >  #include "i915_vma.h"
> >  
> >  #include "i915_drv.h"
> >  #include "i915_globals.h"
> > -#include "intel_ringbuffer.h"
> >  #include "intel_frontbuffer.h"
> >  
> >  #include <drm/drm_gem.h>
> > diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> > index 3045e0dee2a1..aa89a9adeffb 100644
> > --- a/drivers/gpu/drm/i915/intel_device_info.h
> > +++ b/drivers/gpu/drm/i915/intel_device_info.h
> > @@ -27,9 +27,11 @@
> >  
> >  #include <uapi/drm/i915_drm.h>
> >  
> > -#include "intel_engine_types.h"
> > +#include "gt/intel_engine_types.h"
> > +#include "gt/intel_context_types.h"
> > +#include "gt/intel_sseu.h"
> > +
> >  #include "intel_display.h"
> > -#include "intel_sseu.h"
> >  
> >  struct drm_printer;
> >  struct drm_i915_private;
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 3bd40a4a6739..24e70d46b872 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -46,7 +46,6 @@
> >  
> >  #include "i915_drv.h"
> >  #include "i915_gem_clflush.h"
> > -#include "i915_reset.h"
> >  #include "i915_trace.h"
> >  #include "intel_atomic_plane.h"
> >  #include "intel_color.h"
> > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> > index 37f60cb8e9e1..1b6d6403ee92 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> > @@ -25,8 +25,9 @@
> >  #include <linux/circ_buf.h>
> >  #include <trace/events/dma_fence.h>
> >  
> > +#include "gt/intel_lrc_reg.h"
> > +
> >  #include "intel_guc_submission.h"
> > -#include "intel_lrc_reg.h"
> >  #include "i915_drv.h"
> >  
> >  #define GUC_PREEMPT_FINISHED           0x1
> > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h
> > index aa5e6749c925..7d823a513b9c 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_submission.h
> > +++ b/drivers/gpu/drm/i915/intel_guc_submission.h
> > @@ -27,9 +27,10 @@
> >  
> >  #include <linux/spinlock.h>
> >  
> > +#include "gt/intel_engine_types.h"
> > +
> >  #include "i915_gem.h"
> >  #include "i915_selftest.h"
> > -#include "intel_engine_types.h"
> >  
> >  struct drm_i915_private;
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> > index 25b80ffe71ad..13f823ff8083 100644
> > --- a/drivers/gpu/drm/i915/intel_uc.c
> > +++ b/drivers/gpu/drm/i915/intel_uc.c
> > @@ -22,11 +22,11 @@
> >   *
> >   */
> >  
> > +#include "gt/intel_reset.h"
> >  #include "intel_uc.h"
> >  #include "intel_guc_submission.h"
> >  #include "intel_guc.h"
> >  #include "i915_drv.h"
> > -#include "i915_reset.h"
> >  
> >  static void guc_free_load_err_log(struct intel_guc *guc);
> >  
> > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > index e1cb22f03e8e..6f52ca881173 100644
> > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> > @@ -24,8 +24,9 @@
> >  
> >  #include <linux/prime_numbers.h>
> >  
> > -#include "../i915_reset.h"
> > -#include "../i915_selftest.h"
> > +#include "gt/intel_reset.h"
> > +#include "i915_selftest.h"
> > +
> >  #include "i915_random.h"
> >  #include "igt_flush_test.h"
> >  #include "igt_live_test.h"
> > diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c
> > index 208a966da8ca..4f31b137c428 100644
> > --- a/drivers/gpu/drm/i915/selftests/igt_reset.c
> > +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c
> > @@ -6,8 +6,9 @@
> >  
> >  #include "igt_reset.h"
> >  
> > +#include "gt/intel_engine.h"
> > +
> >  #include "../i915_drv.h"
> > -#include "../intel_ringbuffer.h"
> >  
> >  void igt_global_reset_lock(struct drm_i915_private *i915)
> >  {
> > diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h
> > index 391777c76dc7..d312e7cdab68 100644
> > --- a/drivers/gpu/drm/i915/selftests/igt_spinner.h
> > +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h
> > @@ -9,9 +9,10 @@
> >  
> >  #include "../i915_selftest.h"
> >  
> > +#include "gt/intel_engine.h"
> > +
> >  #include "../i915_drv.h"
> >  #include "../i915_request.h"
> > -#include "../intel_ringbuffer.h"
> >  #include "../i915_gem_context.h"
> >  
> >  struct igt_spinner {
> > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> > index 60bbf8b4df40..f444ee5add27 100644
> > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> > @@ -25,7 +25,8 @@
> >  #include <linux/pm_domain.h>
> >  #include <linux/pm_runtime.h>
> >  
> > -#include "mock_engine.h"
> > +#include "gt/mock_engine.h"
> > +
> >  #include "mock_context.h"
> >  #include "mock_request.h"
> >  #include "mock_gem_device.h"
> > diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
> > index d1a7c9608712..f739ba63057f 100644
> > --- a/drivers/gpu/drm/i915/selftests/mock_request.c
> > +++ b/drivers/gpu/drm/i915/selftests/mock_request.c
> > @@ -22,7 +22,8 @@
> >   *
> >   */
> >  
> > -#include "mock_engine.h"
> > +#include "gt/mock_engine.h"
> > +
> >  #include "mock_request.h"
> >  
> >  struct i915_request *
> > -- 
> > 2.20.1
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/
  2019-04-23  8:57     ` Joonas Lahtinen
@ 2019-04-23  9:40       ` Jani Nikula
  2019-04-23 16:46         ` Rodrigo Vivi
  0 siblings, 1 reply; 68+ messages in thread
From: Jani Nikula @ 2019-04-23  9:40 UTC (permalink / raw)
  To: Joonas Lahtinen, Chris Wilson, Rodrigo Vivi, intel-gfx


I'll want two things:

* Explicit ack from Rodrigo too

* The dependencies merged first, and this one posted as a single
  patch. I really want this to stand out better, instead of semi-hidden
  in the middle of a 30+ patch series.


Acked-by: Jani Nikula <jani.nikula@intel.com>


On Tue, 23 Apr 2019, Joonas Lahtinen <joonas.lahtinen@linux.intel.com> wrote:
> Quoting Joonas Lahtinen (2019-04-18 15:04:49)
>> + Jani and Rodrigo to comment
>
> No objection here and drm-intel-next was freshly tagged, so this is:
>
> Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>
> Regards, Joonas
>
>> 
>> I'm definitely all for doing this, so it's only a matter of the timing.
>> 
>> Question is, do we want to do it right now after last drm-intel-next was
>> tagged, or do we want to wait a couple of release candidates.
>> 
>> I'm leaning towards doing this ASAP, as git cherry-pick should
>> understand that they're just renames, so there should be no issue with
>> doing the -fixes.
>> 
>> Regards, Joonas
>> 
>> Quoting Chris Wilson (2019-04-17 10:56:32)
>> > Start partitioning off the code that talks to the hardware (GT) from the
>> > uapi layers and move the device facing code under gt/
>> > 
>> > One casualty is s/intel_ringbuffer.h/intel_engine.h/ with the plan to
>> > subdivide that header and body further (and split out the submission
>> > code from the ringbuffer and logical context handling). This patch aims
>> > to be simple motion so git can fixup inflight patches with little mess.
>> > 
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

-- 
Jani Nikula, Intel Open Source Graphics Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling
  2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
                   ` (33 preceding siblings ...)
  2019-04-18 10:32 ` [PATCH 01/32] " Tvrtko Ursulin
@ 2019-04-23 12:59 ` Tvrtko Ursulin
  34 siblings, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-23 12:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> Currently there is an underlying assumption that i915_request_unsubmit()
> is synchronous wrt the GPU -- that is the request is no longer in flight
> as we remove it. In the near future that may change, and this may upset
> our signaling as we can process an interrupt for that request while it
> is no longer in flight.
> 
> CPU0					CPU1
> intel_engine_breadcrumbs_irq
> (queue request completion)
> 					i915_request_cancel_signaling
> ...					...
> 					i915_request_enable_signaling
> dma_fence_signal
> 
> Hence in the time it took us to drop the lock to signal the request, a
> preemption event may have occurred and re-queued the request. In the
> process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and
> so reused the rq->signal_link that was in use on CPU0, leading to bad
> pointer chasing in intel_engine_breadcrumbs_irq.
> 
> A related issue was that if someone started listening for a signal on a
> completed but no longer in-flight request, we missed the opportunity to
> immediately signal that request.
> 
> Furthermore, as intel_contexts may be immediately released during
> request retirement, in order to be entirely sure that
> intel_engine_breadcrumbs_irq may no longer dereference the intel_context
> (ce->signals and ce->signal_link), we must wait for irq spinlock.
> 
> In order to prevent the race, we use a bit in the fence.flags to signal
> the transfer onto the signal list inside intel_engine_breadcrumbs_irq.
> For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then
> quickly signals to any outside observer that the fence is indeed signaled.
> 
> Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/dma-buf/dma-fence.c              |  1 +
>   drivers/gpu/drm/i915/i915_request.c      |  1 +
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 52 ++++++++++++++----------
>   3 files changed, 33 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
> index 3aa8733f832a..9bf06042619a 100644
> --- a/drivers/dma-buf/dma-fence.c
> +++ b/drivers/dma-buf/dma-fence.c
> @@ -29,6 +29,7 @@
>   
>   EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
>   EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
> +EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
>   
>   static DEFINE_SPINLOCK(dma_fence_stub_lock);
>   static struct dma_fence dma_fence_stub;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index b836721d3b13..e0efc334463b 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -432,6 +432,7 @@ void __i915_request_submit(struct i915_request *request)
>   	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
>   
>   	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
> +	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
>   	    !i915_request_enable_breadcrumb(request))
>   		intel_engine_queue_breadcrumbs(engine);
>   
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 3cbffd400b1b..e19f84b006cc 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -23,6 +23,7 @@
>    */
>   
>   #include <linux/kthread.h>
> +#include <trace/events/dma_fence.h>
>   #include <uapi/linux/sched/types.h>
>   
>   #include "i915_drv.h"
> @@ -83,6 +84,7 @@ static inline bool __request_completed(const struct i915_request *rq)
>   void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   {
>   	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	const ktime_t timestamp = ktime_get();
>   	struct intel_context *ce, *cn;
>   	struct list_head *pos, *next;
>   	LIST_HEAD(signal);
> @@ -104,6 +106,11 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   
>   			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
>   					     &rq->fence.flags));
> +			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +
> +			if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +					     &rq->fence.flags))
> +				continue;

 From here to below is intimate coupling with the dma_fence_signal 
implementation, via open-coding it (with some optimizations as well).

I am thinking about this solution.. here we put:

	if (!__dma_fence_maybe_signal(&rq->fence))
		continue;

Add the low-level helper to dma-fence.c and export it.

And below..

>   
>   			/*
>   			 * Queue for execution after dropping the signaling
> @@ -111,14 +118,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   			 * more signalers to the same context or engine.
>   			 */
>   			i915_request_get(rq);
> -
> -			/*
> -			 * We may race with direct invocation of
> -			 * dma_fence_signal(), e.g. i915_request_retire(),
> -			 * so we need to acquire our reference to the request
> -			 * before we cancel the breadcrumb.
> -			 */
> -			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   			list_add_tail(&rq->signal_link, &signal);
>   		}
>   
> @@ -140,8 +139,21 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
>   	list_for_each_safe(pos, next, &signal) {
>   		struct i915_request *rq =
>   			list_entry(pos, typeof(*rq), signal_link);
> +		struct dma_fence_cb *cur, *tmp;
> +
> +		trace_dma_fence_signaled(&rq->fence);
> +
> +		rq->fence.timestamp = timestamp;
> +		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &rq->fence.flags);
> +
> +		spin_lock(&rq->lock);
> +		list_for_each_entry_safe(cur, tmp, &rq->fence.cb_list, node) {
> +			INIT_LIST_HEAD(&cur->node);
> +			cur->func(&rq->fence, cur);
> +		}
> +		INIT_LIST_HEAD(&rq->fence.cb_list);
> +		spin_unlock(&rq->lock);

..we do:

   __dma_fence_complete/force/finish_signal(&rq->fence, timestamp));

Also export etc, instead of the whole above block.

This way it is both self-documenting in our code and we remove the 
intimate coupling with dma-fence implementation details.

No need to export the tracepoint then either.

(You can have a prequel patch to optimise the list_del_init in 
dma_fence_signal.)

Thoughts?

Regards,

Tvrtko

>   
> -		dma_fence_signal(&rq->fence);
>   		i915_request_put(rq);
>   	}
>   }
> @@ -243,19 +255,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
>   
>   bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   {
> -	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
> -
> -	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
> -
> -	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
> -		return true;
> +	lockdep_assert_held(&rq->lock);
> +	lockdep_assert_irqs_disabled();
>   
> -	spin_lock(&b->irq_lock);
> -	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
> -	    !__request_completed(rq)) {
> +	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
> +		struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
>   		struct intel_context *ce = rq->hw_context;
>   		struct list_head *pos;
>   
> +		spin_lock(&b->irq_lock);
> +		GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
> +
>   		__intel_breadcrumbs_arm_irq(b);
>   
>   		/*
> @@ -284,8 +294,8 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   			list_move_tail(&ce->signal_link, &b->signalers);
>   
>   		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +		spin_unlock(&b->irq_lock);
>   	}
> -	spin_unlock(&b->irq_lock);
>   
>   	return !__request_completed(rq);
>   }
> @@ -294,8 +304,8 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>   {
>   	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
>   
> -	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
> -		return;
> +	lockdep_assert_held(&rq->lock);
> +	lockdep_assert_irqs_disabled();
>   
>   	spin_lock(&b->irq_lock);
>   	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 12/32] drm/i915: Invert the GEM wakeref hierarchy
  2019-04-17  7:56 ` [PATCH 12/32] drm/i915: Invert the GEM wakeref hierarchy Chris Wilson
  2019-04-18 12:42   ` Tvrtko Ursulin
@ 2019-04-23 13:02   ` Tvrtko Ursulin
  1 sibling, 0 replies; 68+ messages in thread
From: Tvrtko Ursulin @ 2019-04-23 13:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/04/2019 08:56, Chris Wilson wrote:
> In the current scheme, on submitting a request we take a single global
> GEM wakeref, which trickles down to wake up all GT power domains. This
> is undesirable as we would like to be able to localise our power
> management to the available power domains and to remove the global GEM
> operations from the heart of the driver. (The intent there is to push
> global GEM decisions to the boundary as used by the GEM user interface.)
> 
> Now during request construction, each request is responsible via its
> logical context to acquire a wakeref on each power domain it intends to
> utilize. Currently, each request takes a wakeref on the engine(s) and
> the engines themselves take a chipset wakeref. This gives us a
> transition on each engine which we can extend if we want to insert more
> powermangement control (such as soft rc6). The global GEM operations
> that currently require a struct_mutex are reduced to listening to pm
> events from the chipset GT wakeref. As we reduce the struct_mutex
> requirement, these listeners should evaporate.
> 
> Perhaps the biggest immediate change is that this removes the
> struct_mutex requirement around GT power management, allowing us greater
> flexibility in request construction. Another important knock-on effect,
> is that by tracking engine usage, we can insert a switch back to the
> kernel context on that engine immediately, avoiding any extra delay or
> inserting global synchronisation barriers. This makes tracking when an
> engine and its associated contexts are idle much easier -- important for
> when we forgo our assumed execution ordering and need idle barriers to
> unpin used contexts. In the process, it means we remove a large chunk of
> code whose only purpose was to switch back to the kernel context.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Imre Deak <imre.deak@intel.com>

There will be some inconsequential changes after rebase, but in principle:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

> ---
>   drivers/gpu/drm/i915/Makefile                 |   2 +
>   drivers/gpu/drm/i915/gt/intel_context.c       |  18 +-
>   drivers/gpu/drm/i915/gt/intel_engine.h        |   9 +-
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 142 +---------
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c     | 153 ++++++++++
>   drivers/gpu/drm/i915/gt/intel_engine_pm.h     |  20 ++
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |   7 +-
>   drivers/gpu/drm/i915/gt/intel_gt_pm.c         | 143 ++++++++++
>   drivers/gpu/drm/i915/gt/intel_gt_pm.h         |  27 ++
>   drivers/gpu/drm/i915/gt/intel_hangcheck.c     |   7 +
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |   6 +-
>   drivers/gpu/drm/i915/gt/intel_reset.c         | 101 +------
>   drivers/gpu/drm/i915/gt/intel_reset.h         |   1 -
>   drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  16 +-
>   drivers/gpu/drm/i915/gt/mock_engine.c         |   3 +
>   drivers/gpu/drm/i915/gt/selftest_hangcheck.c  |  49 +---
>   .../gpu/drm/i915/gt/selftest_workarounds.c    |   5 +-
>   drivers/gpu/drm/i915/i915_debugfs.c           |  16 +-
>   drivers/gpu/drm/i915/i915_drv.c               |   5 +-
>   drivers/gpu/drm/i915/i915_drv.h               |   8 +-
>   drivers/gpu/drm/i915/i915_gem.c               |  41 +--
>   drivers/gpu/drm/i915/i915_gem.h               |   3 -
>   drivers/gpu/drm/i915/i915_gem_context.c       |  85 +-----
>   drivers/gpu/drm/i915/i915_gem_context.h       |   4 -
>   drivers/gpu/drm/i915/i915_gem_evict.c         |  47 +---
>   drivers/gpu/drm/i915/i915_gem_pm.c            | 264 ++++++------------
>   drivers/gpu/drm/i915/i915_gem_pm.h            |   3 -
>   drivers/gpu/drm/i915/i915_gpu_error.h         |   4 -
>   drivers/gpu/drm/i915/i915_request.c           |  10 +-
>   drivers/gpu/drm/i915/i915_request.h           |   2 +-
>   drivers/gpu/drm/i915/intel_uc.c               |  22 +-
>   drivers/gpu/drm/i915/intel_uc.h               |   2 +-
>   drivers/gpu/drm/i915/selftests/i915_gem.c     |  16 +-
>   .../gpu/drm/i915/selftests/i915_gem_context.c | 114 +-------
>   .../gpu/drm/i915/selftests/i915_gem_object.c  |  29 +-
>   .../gpu/drm/i915/selftests/igt_flush_test.c   |  32 ++-
>   .../gpu/drm/i915/selftests/mock_gem_device.c  |  15 +-
>   37 files changed, 598 insertions(+), 833 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pm.c
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pm.h
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm.c
>   create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 858642c7bc40..dd8d923aa1c6 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -71,6 +71,8 @@ gt-y += \
>   	gt/intel_breadcrumbs.o \
>   	gt/intel_context.o \
>   	gt/intel_engine_cs.o \
> +	gt/intel_engine_pm.o \
> +	gt/intel_gt_pm.o \
>   	gt/intel_hangcheck.o \
>   	gt/intel_lrc.o \
>   	gt/intel_reset.o \
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 4410e20e8e13..298e463ad082 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -10,6 +10,7 @@
>   
>   #include "intel_context.h"
>   #include "intel_engine.h"
> +#include "intel_engine_pm.h"
>   
>   static struct i915_global_context {
>   	struct i915_global base;
> @@ -162,7 +163,11 @@ intel_context_pin(struct i915_gem_context *ctx,
>   		return ERR_PTR(-EINTR);
>   
>   	if (likely(!atomic_read(&ce->pin_count))) {
> -		err = ce->ops->pin(ce);
> +		intel_wakeref_t wakeref;
> +
> +		err = 0;
> +		with_intel_runtime_pm(ce->engine->i915, wakeref)
> +			err = ce->ops->pin(ce);
>   		if (err)
>   			goto err;
>   
> @@ -269,17 +274,10 @@ int __init i915_global_context_init(void)
>   
>   void intel_context_enter_engine(struct intel_context *ce)
>   {
> -	struct drm_i915_private *i915 = ce->gem_context->i915;
> -
> -	if (!i915->gt.active_requests++)
> -		i915_gem_unpark(i915);
> +	intel_engine_pm_get(ce->engine);
>   }
>   
>   void intel_context_exit_engine(struct intel_context *ce)
>   {
> -	struct drm_i915_private *i915 = ce->gem_context->i915;
> -
> -	GEM_BUG_ON(!i915->gt.active_requests);
> -	if (!--i915->gt.active_requests)
> -		i915_gem_park(i915);
> +	intel_engine_pm_put(ce->engine);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 72c7c337ace9..a228dc1774d8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -382,6 +382,8 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
>   void intel_engine_get_instdone(struct intel_engine_cs *engine,
>   			       struct intel_instdone *instdone);
>   
> +void intel_engine_init_execlists(struct intel_engine_cs *engine);
> +
>   void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
>   void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
>   
> @@ -458,19 +460,14 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
>   {
>   	if (engine->reset.reset)
>   		engine->reset.reset(engine, stalled);
> +	engine->serial++; /* contexts lost */
>   }
>   
> -void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
> -void intel_gt_resume(struct drm_i915_private *i915);
> -
>   bool intel_engine_is_idle(struct intel_engine_cs *engine);
>   bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
>   
>   void intel_engine_lost_context(struct intel_engine_cs *engine);
>   
> -void intel_engines_park(struct drm_i915_private *i915);
> -void intel_engines_unpark(struct drm_i915_private *i915);
> -
>   void intel_engines_reset_default_submission(struct drm_i915_private *i915);
>   unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 21dd3f25e641..268dfb8e16ff 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -27,6 +27,7 @@
>   #include "i915_drv.h"
>   
>   #include "intel_engine.h"
> +#include "intel_engine_pm.h"
>   #include "intel_lrc.h"
>   #include "intel_reset.h"
>   
> @@ -451,7 +452,7 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
>   	i915_gem_batch_pool_init(&engine->batch_pool, engine);
>   }
>   
> -static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> +void intel_engine_init_execlists(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   
> @@ -584,10 +585,11 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
>   	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
>   
>   	intel_engine_init_breadcrumbs(engine);
> -	intel_engine_init_execlist(engine);
> +	intel_engine_init_execlists(engine);
>   	intel_engine_init_hangcheck(engine);
>   	intel_engine_init_batch_pool(engine);
>   	intel_engine_init_cmd_parser(engine);
> +	intel_engine_init__pm(engine);
>   
>   	/* Use the whole device by default */
>   	engine->sseu =
> @@ -758,30 +760,6 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>   	return ret;
>   }
>   
> -void intel_gt_resume(struct drm_i915_private *i915)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	/*
> -	 * After resume, we may need to poke into the pinned kernel
> -	 * contexts to paper over any damage caused by the sudden suspend.
> -	 * Only the kernel contexts should remain pinned over suspend,
> -	 * allowing us to fixup the user contexts on their first pin.
> -	 */
> -	for_each_engine(engine, i915, id) {
> -		struct intel_context *ce;
> -
> -		ce = engine->kernel_context;
> -		if (ce)
> -			ce->ops->reset(ce);
> -
> -		ce = engine->preempt_context;
> -		if (ce)
> -			ce->ops->reset(ce);
> -	}
> -}
> -
>   /**
>    * intel_engines_cleanup_common - cleans up the engine state created by
>    *                                the common initiailizers.
> @@ -1128,117 +1106,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
>   		engine->set_default_submission(engine);
>   }
>   
> -static bool reset_engines(struct drm_i915_private *i915)
> -{
> -	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
> -		return false;
> -
> -	return intel_gpu_reset(i915, ALL_ENGINES) == 0;
> -}
> -
> -/**
> - * intel_engines_sanitize: called after the GPU has lost power
> - * @i915: the i915 device
> - * @force: ignore a failed reset and sanitize engine state anyway
> - *
> - * Anytime we reset the GPU, either with an explicit GPU reset or through a
> - * PCI power cycle, the GPU loses state and we must reset our state tracking
> - * to match. Note that calling intel_engines_sanitize() if the GPU has not
> - * been reset results in much confusion!
> - */
> -void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	GEM_TRACE("\n");
> -
> -	if (!reset_engines(i915) && !force)
> -		return;
> -
> -	for_each_engine(engine, i915, id)
> -		intel_engine_reset(engine, false);
> -}
> -
> -/**
> - * intel_engines_park: called when the GT is transitioning from busy->idle
> - * @i915: the i915 device
> - *
> - * The GT is now idle and about to go to sleep (maybe never to wake again?).
> - * Time for us to tidy and put away our toys (release resources back to the
> - * system).
> - */
> -void intel_engines_park(struct drm_i915_private *i915)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, i915, id) {
> -		/* Flush the residual irq tasklets first. */
> -		intel_engine_disarm_breadcrumbs(engine);
> -		tasklet_kill(&engine->execlists.tasklet);
> -
> -		/*
> -		 * We are committed now to parking the engines, make sure there
> -		 * will be no more interrupts arriving later and the engines
> -		 * are truly idle.
> -		 */
> -		if (wait_for(intel_engine_is_idle(engine), 10)) {
> -			struct drm_printer p = drm_debug_printer(__func__);
> -
> -			dev_err(i915->drm.dev,
> -				"%s is not idle before parking\n",
> -				engine->name);
> -			intel_engine_dump(engine, &p, NULL);
> -		}
> -
> -		/* Must be reset upon idling, or we may miss the busy wakeup. */
> -		GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
> -
> -		if (engine->park)
> -			engine->park(engine);
> -
> -		if (engine->pinned_default_state) {
> -			i915_gem_object_unpin_map(engine->default_state);
> -			engine->pinned_default_state = NULL;
> -		}
> -
> -		i915_gem_batch_pool_fini(&engine->batch_pool);
> -		engine->execlists.no_priolist = false;
> -	}
> -
> -	i915->gt.active_engines = 0;
> -}
> -
> -/**
> - * intel_engines_unpark: called when the GT is transitioning from idle->busy
> - * @i915: the i915 device
> - *
> - * The GT was idle and now about to fire up with some new user requests.
> - */
> -void intel_engines_unpark(struct drm_i915_private *i915)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, i915, id) {
> -		void *map;
> -
> -		/* Pin the default state for fast resets from atomic context. */
> -		map = NULL;
> -		if (engine->default_state)
> -			map = i915_gem_object_pin_map(engine->default_state,
> -						      I915_MAP_WB);
> -		if (!IS_ERR_OR_NULL(map))
> -			engine->pinned_default_state = map;
> -
> -		if (engine->unpark)
> -			engine->unpark(engine);
> -
> -		intel_engine_init_hangcheck(engine);
> -	}
> -}
> -
>   /**
>    * intel_engine_lost_context: called when the GPU is reset into unknown state
>    * @engine: the engine
> @@ -1523,6 +1390,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	if (i915_reset_failed(engine->i915))
>   		drm_printf(m, "*** WEDGED ***\n");
>   
> +	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
>   	drm_printf(m, "\tHangcheck %x:%x [%d ms]\n",
>   		   engine->hangcheck.last_seqno,
>   		   engine->hangcheck.next_seqno,
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> new file mode 100644
> index 000000000000..cc0adfa14947
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -0,0 +1,153 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +
> +#include "intel_engine.h"
> +#include "intel_engine_pm.h"
> +#include "intel_gt_pm.h"
> +
> +static int intel_engine_unpark(struct intel_wakeref *wf)
> +{
> +	struct intel_engine_cs *engine =
> +		container_of(wf, typeof(*engine), wakeref);
> +	void *map;
> +
> +	GEM_TRACE("%s\n", engine->name);
> +
> +	intel_gt_pm_get(engine->i915);
> +
> +	/* Pin the default state for fast resets from atomic context. */
> +	map = NULL;
> +	if (engine->default_state)
> +		map = i915_gem_object_pin_map(engine->default_state,
> +					      I915_MAP_WB);
> +	if (!IS_ERR_OR_NULL(map))
> +		engine->pinned_default_state = map;
> +
> +	if (engine->unpark)
> +		engine->unpark(engine);
> +
> +	intel_engine_init_hangcheck(engine);
> +	return 0;
> +}
> +
> +void intel_engine_pm_get(struct intel_engine_cs *engine)
> +{
> +	intel_wakeref_get(engine->i915, &engine->wakeref, intel_engine_unpark);
> +}
> +
> +static bool switch_to_kernel_context(struct intel_engine_cs *engine)
> +{
> +	struct i915_request *rq;
> +
> +	/* Already inside the kernel context, safe to power down. */
> +	if (engine->wakeref_serial == engine->serial)
> +		return true;
> +
> +	/* GPU is pointing to the void, as good as in the kernel context. */
> +	if (i915_reset_failed(engine->i915))
> +		return true;
> +
> +	/*
> +	 * Note, we do this without taking the timeline->mutex. We cannot
> +	 * as we may be called while retiring the kernel context and so
> +	 * already underneath the timeline->mutex. Instead we rely on the
> +	 * exclusive property of the intel_engine_park that prevents anyone
> +	 * else from creating a request on this engine. This also requires
> +	 * that the ring is empty and we avoid any waits while constructing
> +	 * the context, as they assume protection by the timeline->mutex.
> +	 * This should hold true as we can only park the engine after
> +	 * retiring the last request, thus all rings should be empty and
> +	 * all timelines idle.
> +	 */
> +	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
> +	if (IS_ERR(rq))
> +		/* Context switch failed, hope for the best! Maybe reset? */
> +		return true;
> +
> +	/* Check again on the next retirement. */
> +	engine->wakeref_serial = engine->serial + 1;
> +	__i915_request_commit(rq);
> +
> +	return false;
> +}
> +
> +static int intel_engine_park(struct intel_wakeref *wf)
> +{
> +	struct intel_engine_cs *engine =
> +		container_of(wf, typeof(*engine), wakeref);
> +
> +	/*
> +	 * If one and only one request is completed between pm events,
> +	 * we know that we are inside the kernel context and it is
> +	 * safe to power down. (We are paranoid in case that runtime
> +	 * suspend causes corruption to the active context image, and
> +	 * want to avoid that impacting userspace.)
> +	 */
> +	if (!switch_to_kernel_context(engine))
> +		return -EBUSY;
> +
> +	GEM_TRACE("%s\n", engine->name);
> +
> +	intel_engine_disarm_breadcrumbs(engine);
> +
> +	/* Must be reset upon idling, or we may miss the busy wakeup. */
> +	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
> +
> +	if (engine->park)
> +		engine->park(engine);
> +
> +	if (engine->pinned_default_state) {
> +		i915_gem_object_unpin_map(engine->default_state);
> +		engine->pinned_default_state = NULL;
> +	}
> +
> +	engine->execlists.no_priolist = false;
> +
> +	intel_gt_pm_put(engine->i915);
> +	return 0;
> +}
> +
> +void intel_engine_pm_put(struct intel_engine_cs *engine)
> +{
> +	intel_wakeref_put(engine->i915, &engine->wakeref, intel_engine_park);
> +}
> +
> +void intel_engine_init__pm(struct intel_engine_cs *engine)
> +{
> +	intel_wakeref_init(&engine->wakeref);
> +}
> +
> +int intel_engines_resume(struct drm_i915_private *i915)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	int err = 0;
> +
> +	/*
> +	 * After resume, we may need to poke into the pinned kernel
> +	 * contexts to paper over any damage caused by the sudden suspend.
> +	 * Only the kernel contexts should remain pinned over suspend,
> +	 * allowing us to fixup the user contexts on their first pin.
> +	 */
> +	intel_gt_pm_get(i915);
> +	for_each_engine(engine, i915, id) {
> +		intel_engine_pm_get(engine);
> +		engine->serial++; /* kernel context lost */
> +		err = engine->resume(engine);
> +		intel_engine_pm_put(engine);
> +		if (err) {
> +			dev_err(i915->drm.dev,
> +				"Failed to restart %s (%d)\n",
> +				engine->name, err);
> +			break;
> +		}
> +	}
> +	intel_gt_pm_put(i915);
> +
> +	return err;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> new file mode 100644
> index 000000000000..143ac90ba117
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
> @@ -0,0 +1,20 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_ENGINE_PM_H
> +#define INTEL_ENGINE_PM_H
> +
> +struct drm_i915_private;
> +struct intel_engine_cs;
> +
> +void intel_engine_pm_get(struct intel_engine_cs *engine);
> +void intel_engine_pm_put(struct intel_engine_cs *engine);
> +
> +void intel_engine_init__pm(struct intel_engine_cs *engine);
> +
> +int intel_engines_resume(struct drm_i915_private *i915);
> +
> +#endif /* INTEL_ENGINE_PM_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 3adf58da6d2c..d972c339309c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -20,6 +20,7 @@
>   #include "i915_selftest.h"
>   #include "i915_timeline_types.h"
>   #include "intel_sseu.h"
> +#include "intel_wakeref.h"
>   #include "intel_workarounds_types.h"
>   
>   #define I915_MAX_SLICES	3
> @@ -287,6 +288,10 @@ struct intel_engine_cs {
>   	struct intel_context *kernel_context; /* pinned */
>   	struct intel_context *preempt_context; /* pinned; optional */
>   
> +	unsigned long serial;
> +
> +	unsigned long wakeref_serial;
> +	struct intel_wakeref wakeref;
>   	struct drm_i915_gem_object *default_state;
>   	void *pinned_default_state;
>   
> @@ -359,7 +364,7 @@ struct intel_engine_cs {
>   	void		(*irq_enable)(struct intel_engine_cs *engine);
>   	void		(*irq_disable)(struct intel_engine_cs *engine);
>   
> -	int		(*init_hw)(struct intel_engine_cs *engine);
> +	int		(*resume)(struct intel_engine_cs *engine);
>   
>   	struct {
>   		void (*prepare)(struct intel_engine_cs *engine);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> new file mode 100644
> index 000000000000..ae7155f0e063
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -0,0 +1,143 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "intel_gt_pm.h"
> +#include "intel_pm.h"
> +#include "intel_wakeref.h"
> +
> +static void pm_notify(struct drm_i915_private *i915, int state)
> +{
> +	blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915);
> +}
> +
> +static int intel_gt_unpark(struct intel_wakeref *wf)
> +{
> +	struct drm_i915_private *i915 =
> +		container_of(wf, typeof(*i915), gt.wakeref);
> +
> +	GEM_TRACE("\n");
> +
> +	/*
> +	 * It seems that the DMC likes to transition between the DC states a lot
> +	 * when there are no connected displays (no active power domains) during
> +	 * command submission.
> +	 *
> +	 * This activity has negative impact on the performance of the chip with
> +	 * huge latencies observed in the interrupt handler and elsewhere.
> +	 *
> +	 * Work around it by grabbing a GT IRQ power domain whilst there is any
> +	 * GT activity, preventing any DC state transitions.
> +	 */
> +	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
> +	GEM_BUG_ON(!i915->gt.awake);
> +
> +	intel_enable_gt_powersave(i915);
> +
> +	i915_update_gfx_val(i915);
> +	if (INTEL_GEN(i915) >= 6)
> +		gen6_rps_busy(i915);
> +
> +	i915_pmu_gt_unparked(i915);
> +
> +	i915_queue_hangcheck(i915);
> +
> +	pm_notify(i915, INTEL_GT_UNPARK);
> +
> +	return 0;
> +}
> +
> +void intel_gt_pm_get(struct drm_i915_private *i915)
> +{
> +	intel_wakeref_get(i915, &i915->gt.wakeref, intel_gt_unpark);
> +}
> +
> +static int intel_gt_park(struct intel_wakeref *wf)
> +{
> +	struct drm_i915_private *i915 =
> +		container_of(wf, typeof(*i915), gt.wakeref);
> +	intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake);
> +
> +	GEM_TRACE("\n");
> +
> +	pm_notify(i915, INTEL_GT_PARK);
> +
> +	i915_pmu_gt_parked(i915);
> +	if (INTEL_GEN(i915) >= 6)
> +		gen6_rps_idle(i915);
> +
> +	GEM_BUG_ON(!wakeref);
> +	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
> +
> +	return 0;
> +}
> +
> +void intel_gt_pm_put(struct drm_i915_private *i915)
> +{
> +	intel_wakeref_put(i915, &i915->gt.wakeref, intel_gt_park);
> +}
> +
> +void intel_gt_pm_init(struct drm_i915_private *i915)
> +{
> +	intel_wakeref_init(&i915->gt.wakeref);
> +	BLOCKING_INIT_NOTIFIER_HEAD(&i915->gt.pm_notifications);
> +}
> +
> +static bool reset_engines(struct drm_i915_private *i915)
> +{
> +	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
> +		return false;
> +
> +	return intel_gpu_reset(i915, ALL_ENGINES) == 0;
> +}
> +
> +/**
> + * intel_gt_sanitize: called after the GPU has lost power
> + * @i915: the i915 device
> + * @force: ignore a failed reset and sanitize engine state anyway
> + *
> + * Anytime we reset the GPU, either with an explicit GPU reset or through a
> + * PCI power cycle, the GPU loses state and we must reset our state tracking
> + * to match. Note that calling intel_gt_sanitize() if the GPU has not
> + * been reset results in much confusion!
> + */
> +void intel_gt_sanitize(struct drm_i915_private *i915, bool force)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	GEM_TRACE("\n");
> +
> +	if (!reset_engines(i915) && !force)
> +		return;
> +
> +	for_each_engine(engine, i915, id)
> +		intel_engine_reset(engine, false);
> +}
> +
> +void intel_gt_resume(struct drm_i915_private *i915)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	/*
> +	 * After resume, we may need to poke into the pinned kernel
> +	 * contexts to paper over any damage caused by the sudden suspend.
> +	 * Only the kernel contexts should remain pinned over suspend,
> +	 * allowing us to fixup the user contexts on their first pin.
> +	 */
> +	for_each_engine(engine, i915, id) {
> +		struct intel_context *ce;
> +
> +		ce = engine->kernel_context;
> +		if (ce)
> +			ce->ops->reset(ce);
> +
> +		ce = engine->preempt_context;
> +		if (ce)
> +			ce->ops->reset(ce);
> +	}
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> new file mode 100644
> index 000000000000..7dd1130a19a4
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
> @@ -0,0 +1,27 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_GT_PM_H
> +#define INTEL_GT_PM_H
> +
> +#include <linux/types.h>
> +
> +struct drm_i915_private;
> +
> +enum {
> +	INTEL_GT_UNPARK,
> +	INTEL_GT_PARK,
> +};
> +
> +void intel_gt_pm_get(struct drm_i915_private *i915);
> +void intel_gt_pm_put(struct drm_i915_private *i915);
> +
> +void intel_gt_pm_init(struct drm_i915_private *i915);
> +
> +void intel_gt_sanitize(struct drm_i915_private *i915, bool force);
> +void intel_gt_resume(struct drm_i915_private *i915);
> +
> +#endif /* INTEL_GT_PM_H */
> diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> index 3053a706a561..e5eaa06fe74d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> @@ -256,6 +256,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>   	struct intel_engine_cs *engine;
>   	enum intel_engine_id id;
>   	unsigned int hung = 0, stuck = 0, wedged = 0;
> +	intel_wakeref_t wakeref;
>   
>   	if (!i915_modparams.enable_hangcheck)
>   		return;
> @@ -266,6 +267,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>   	if (i915_terminally_wedged(dev_priv))
>   		return;
>   
> +	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
> +	if (!wakeref)
> +		return;
> +
>   	/* As enabling the GPU requires fairly extensive mmio access,
>   	 * periodically arm the mmio checker to see if we are triggering
>   	 * any invalid access.
> @@ -313,6 +318,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>   	if (hung)
>   		hangcheck_declare_hang(dev_priv, hung, stuck);
>   
> +	intel_runtime_pm_put(dev_priv, wakeref);
> +
>   	/* Reset timer in case GPU hangs without another request being added */
>   	i915_queue_hangcheck(dev_priv);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index edec7f183688..d17c08e26935 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1789,7 +1789,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
>   	return unexpected;
>   }
>   
> -static int gen8_init_common_ring(struct intel_engine_cs *engine)
> +static int execlists_resume(struct intel_engine_cs *engine)
>   {
>   	intel_engine_apply_workarounds(engine);
>   	intel_engine_apply_whitelist(engine);
> @@ -1822,7 +1822,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
>   	 * completed the reset in i915_gem_reset_finish(). If a request
>   	 * is completed by one engine, it may then queue a request
>   	 * to a second via its execlists->tasklet *just* as we are
> -	 * calling engine->init_hw() and also writing the ELSP.
> +	 * calling engine->resume() and also writing the ELSP.
>   	 * Turning off the execlists->tasklet until the reset is over
>   	 * prevents the race.
>   	 */
> @@ -2391,7 +2391,7 @@ static void
>   logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>   {
>   	/* Default vfuncs which can be overriden by each engine. */
> -	engine->init_hw = gen8_init_common_ring;
> +	engine->resume = execlists_resume;
>   
>   	engine->reset.prepare = execlists_reset_prepare;
>   	engine->reset.reset = execlists_reset;
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 996164d07397..af85723c7e2f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -9,6 +9,8 @@
>   
>   #include "i915_drv.h"
>   #include "i915_gpu_error.h"
> +#include "intel_engine_pm.h"
> +#include "intel_gt_pm.h"
>   #include "intel_reset.h"
>   
>   #include "intel_guc.h"
> @@ -683,6 +685,7 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
>   	 * written to the powercontext is undefined and so we may lose
>   	 * GPU state upon resume, i.e. fail to restart after a reset.
>   	 */
> +	intel_engine_pm_get(engine);
>   	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
>   	engine->reset.prepare(engine);
>   }
> @@ -718,6 +721,7 @@ static void reset_prepare(struct drm_i915_private *i915)
>   	struct intel_engine_cs *engine;
>   	enum intel_engine_id id;
>   
> +	intel_gt_pm_get(i915);
>   	for_each_engine(engine, i915, id)
>   		reset_prepare_engine(engine);
>   
> @@ -755,48 +759,10 @@ static int gt_reset(struct drm_i915_private *i915,
>   static void reset_finish_engine(struct intel_engine_cs *engine)
>   {
>   	engine->reset.finish(engine);
> +	intel_engine_pm_put(engine);
>   	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
>   }
>   
> -struct i915_gpu_restart {
> -	struct work_struct work;
> -	struct drm_i915_private *i915;
> -};
> -
> -static void restart_work(struct work_struct *work)
> -{
> -	struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
> -	struct drm_i915_private *i915 = arg->i915;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	intel_wakeref_t wakeref;
> -
> -	wakeref = intel_runtime_pm_get(i915);
> -	mutex_lock(&i915->drm.struct_mutex);
> -	WRITE_ONCE(i915->gpu_error.restart, NULL);
> -
> -	for_each_engine(engine, i915, id) {
> -		struct i915_request *rq;
> -
> -		/*
> -		 * Ostensibily, we always want a context loaded for powersaving,
> -		 * so if the engine is idle after the reset, send a request
> -		 * to load our scratch kernel_context.
> -		 */
> -		if (!intel_engine_is_idle(engine))
> -			continue;
> -
> -		rq = i915_request_create(engine->kernel_context);
> -		if (!IS_ERR(rq))
> -			i915_request_add(rq);
> -	}
> -
> -	mutex_unlock(&i915->drm.struct_mutex);
> -	intel_runtime_pm_put(i915, wakeref);
> -
> -	kfree(arg);
> -}
> -
>   static void reset_finish(struct drm_i915_private *i915)
>   {
>   	struct intel_engine_cs *engine;
> @@ -806,29 +772,7 @@ static void reset_finish(struct drm_i915_private *i915)
>   		reset_finish_engine(engine);
>   		intel_engine_signal_breadcrumbs(engine);
>   	}
> -}
> -
> -static void reset_restart(struct drm_i915_private *i915)
> -{
> -	struct i915_gpu_restart *arg;
> -
> -	/*
> -	 * Following the reset, ensure that we always reload context for
> -	 * powersaving, and to correct engine->last_retired_context. Since
> -	 * this requires us to submit a request, queue a worker to do that
> -	 * task for us to evade any locking here.
> -	 */
> -	if (READ_ONCE(i915->gpu_error.restart))
> -		return;
> -
> -	arg = kmalloc(sizeof(*arg), GFP_KERNEL);
> -	if (arg) {
> -		arg->i915 = i915;
> -		INIT_WORK(&arg->work, restart_work);
> -
> -		WRITE_ONCE(i915->gpu_error.restart, arg);
> -		queue_work(i915->wq, &arg->work);
> -	}
> +	intel_gt_pm_put(i915);
>   }
>   
>   static void nop_submit_request(struct i915_request *request)
> @@ -889,6 +833,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
>   	 * in nop_submit_request.
>   	 */
>   	synchronize_rcu_expedited();
> +	set_bit(I915_WEDGED, &error->flags);
>   
>   	/* Mark all executing requests as skipped */
>   	for_each_engine(engine, i915, id)
> @@ -896,9 +841,6 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
>   
>   	reset_finish(i915);
>   
> -	smp_mb__before_atomic();
> -	set_bit(I915_WEDGED, &error->flags);
> -
>   	GEM_TRACE("end\n");
>   }
>   
> @@ -956,7 +898,7 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
>   	}
>   	mutex_unlock(&i915->gt.timelines.mutex);
>   
> -	intel_engines_sanitize(i915, false);
> +	intel_gt_sanitize(i915, false);
>   
>   	/*
>   	 * Undo nop_submit_request. We prevent all new i915 requests from
> @@ -1034,7 +976,6 @@ void i915_reset(struct drm_i915_private *i915,
>   	GEM_TRACE("flags=%lx\n", error->flags);
>   
>   	might_sleep();
> -	assert_rpm_wakelock_held(i915);
>   	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
>   
>   	/* Clear any previous failed attempts at recovery. Time to try again. */
> @@ -1087,8 +1028,6 @@ void i915_reset(struct drm_i915_private *i915,
>   
>   finish:
>   	reset_finish(i915);
> -	if (!__i915_wedged(error))
> -		reset_restart(i915);
>   	return;
>   
>   taint:
> @@ -1137,6 +1076,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
>   	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
>   	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
>   
> +	if (!intel_wakeref_active(&engine->wakeref))
> +		return 0;
> +
>   	reset_prepare_engine(engine);
>   
>   	if (msg)
> @@ -1168,7 +1110,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
>   	 * have been reset to their default values. Follow the init_ring
>   	 * process to program RING_MODE, HWSP and re-enable submission.
>   	 */
> -	ret = engine->init_hw(engine);
> +	ret = engine->resume(engine);
>   	if (ret)
>   		goto out;
>   
> @@ -1425,25 +1367,6 @@ int i915_terminally_wedged(struct drm_i915_private *i915)
>   	return __i915_wedged(error) ? -EIO : 0;
>   }
>   
> -bool i915_reset_flush(struct drm_i915_private *i915)
> -{
> -	int err;
> -
> -	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
> -
> -	flush_workqueue(i915->wq);
> -	GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
> -
> -	mutex_lock(&i915->drm.struct_mutex);
> -	err = i915_gem_wait_for_idle(i915,
> -				     I915_WAIT_LOCKED |
> -				     I915_WAIT_FOR_IDLE_BOOST,
> -				     MAX_SCHEDULE_TIMEOUT);
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
> -	return !err;
> -}
> -
>   static void i915_wedge_me(struct work_struct *work)
>   {
>   	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
> index 8e662bb43a9b..b52efaab4941 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.h
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.h
> @@ -34,7 +34,6 @@ int i915_reset_engine(struct intel_engine_cs *engine,
>   		      const char *reason);
>   
>   void i915_reset_request(struct i915_request *rq, bool guilty);
> -bool i915_reset_flush(struct drm_i915_private *i915);
>   
>   int __must_check i915_reset_trylock(struct drm_i915_private *i915);
>   void i915_reset_unlock(struct drm_i915_private *i915, int tag);
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index b2bb7d4bfbe3..f164dbe90050 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -637,12 +637,15 @@ static bool stop_ring(struct intel_engine_cs *engine)
>   	return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
>   }
>   
> -static int init_ring_common(struct intel_engine_cs *engine)
> +static int xcs_resume(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *dev_priv = engine->i915;
>   	struct intel_ring *ring = engine->buffer;
>   	int ret = 0;
>   
> +	GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n",
> +		  engine->name, ring->head, ring->tail);
> +
>   	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
>   
>   	if (!stop_ring(engine)) {
> @@ -827,12 +830,9 @@ static int intel_rcs_ctx_init(struct i915_request *rq)
>   	return 0;
>   }
>   
> -static int init_render_ring(struct intel_engine_cs *engine)
> +static int rcs_resume(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_private *dev_priv = engine->i915;
> -	int ret = init_ring_common(engine);
> -	if (ret)
> -		return ret;
>   
>   	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
>   	if (IS_GEN_RANGE(dev_priv, 4, 6))
> @@ -875,7 +875,7 @@ static int init_render_ring(struct intel_engine_cs *engine)
>   	if (INTEL_GEN(dev_priv) >= 6)
>   		ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
>   
> -	return 0;
> +	return xcs_resume(engine);
>   }
>   
>   static void cancel_requests(struct intel_engine_cs *engine)
> @@ -2207,7 +2207,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>   
>   	intel_ring_init_irq(dev_priv, engine);
>   
> -	engine->init_hw = init_ring_common;
> +	engine->resume = xcs_resume;
>   	engine->reset.prepare = reset_prepare;
>   	engine->reset.reset = reset_ring;
>   	engine->reset.finish = reset_finish;
> @@ -2269,7 +2269,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
>   	if (IS_HASWELL(dev_priv))
>   		engine->emit_bb_start = hsw_emit_bb_start;
>   
> -	engine->init_hw = init_render_ring;
> +	engine->resume = rcs_resume;
>   
>   	ret = intel_init_ring_buffer(engine);
>   	if (ret)
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index bcfeb0c67997..a97a0ab35703 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -24,6 +24,7 @@
>   
>   #include "i915_drv.h"
>   #include "intel_context.h"
> +#include "intel_engine_pm.h"
>   
>   #include "mock_engine.h"
>   #include "selftests/mock_request.h"
> @@ -268,6 +269,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
>   	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
>   
>   	intel_engine_init_breadcrumbs(&engine->base);
> +	intel_engine_init_execlists(&engine->base);
> +	intel_engine_init__pm(&engine->base);
>   
>   	/* fake hw queue */
>   	spin_lock_init(&engine->hw_lock);
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 87c26920212f..6004d6907e9c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -24,6 +24,8 @@
>   
>   #include <linux/kthread.h>
>   
> +#include "intel_engine_pm.h"
> +
>   #include "i915_selftest.h"
>   #include "selftests/i915_random.h"
>   #include "selftests/igt_flush_test.h"
> @@ -479,19 +481,6 @@ static int igt_reset_nop(void *arg)
>   			break;
>   		}
>   
> -		if (!i915_reset_flush(i915)) {
> -			struct drm_printer p =
> -				drm_info_printer(i915->drm.dev);
> -
> -			pr_err("%s failed to idle after reset\n",
> -			       engine->name);
> -			intel_engine_dump(engine, &p,
> -					  "%s\n", engine->name);
> -
> -			err = -EIO;
> -			break;
> -		}
> -
>   		err = igt_flush_test(i915, 0);
>   		if (err)
>   			break;
> @@ -594,19 +583,6 @@ static int igt_reset_nop_engine(void *arg)
>   				err = -EINVAL;
>   				break;
>   			}
> -
> -			if (!i915_reset_flush(i915)) {
> -				struct drm_printer p =
> -					drm_info_printer(i915->drm.dev);
> -
> -				pr_err("%s failed to idle after reset\n",
> -				       engine->name);
> -				intel_engine_dump(engine, &p,
> -						  "%s\n", engine->name);
> -
> -				err = -EIO;
> -				break;
> -			}
>   		} while (time_before(jiffies, end_time));
>   		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>   		pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
> @@ -669,6 +645,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>   		reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
>   							     engine);
>   
> +		intel_engine_pm_get(engine);
>   		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>   		do {
>   			if (active) {
> @@ -721,21 +698,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>   				err = -EINVAL;
>   				break;
>   			}
> -
> -			if (!i915_reset_flush(i915)) {
> -				struct drm_printer p =
> -					drm_info_printer(i915->drm.dev);
> -
> -				pr_err("%s failed to idle after reset\n",
> -				       engine->name);
> -				intel_engine_dump(engine, &p,
> -						  "%s\n", engine->name);
> -
> -				err = -EIO;
> -				break;
> -			}
>   		} while (time_before(jiffies, end_time));
>   		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
> +		intel_engine_pm_put(engine);
>   
>   		if (err)
>   			break;
> @@ -942,6 +907,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
>   			get_task_struct(tsk);
>   		}
>   
> +		intel_engine_pm_get(engine);
>   		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
>   		do {
>   			struct i915_request *rq = NULL;
> @@ -1018,6 +984,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
>   			}
>   		} while (time_before(jiffies, end_time));
>   		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
> +		intel_engine_pm_put(engine);
>   		pr_info("i915_reset_engine(%s:%s): %lu resets\n",
>   			engine->name, test_name, count);
>   
> @@ -1069,7 +1036,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
>   		if (err)
>   			break;
>   
> -		err = igt_flush_test(i915, 0);
> +		mutex_lock(&i915->drm.struct_mutex);
> +		err = igt_flush_test(i915, I915_WAIT_LOCKED);
> +		mutex_unlock(&i915->drm.struct_mutex);
>   		if (err)
>   			break;
>   	}
> diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> index 96c6282f3a10..461d91737077 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> @@ -71,7 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   {
>   	const u32 base = engine->mmio_base;
>   	struct drm_i915_gem_object *result;
> -	intel_wakeref_t wakeref;
>   	struct i915_request *rq;
>   	struct i915_vma *vma;
>   	u32 srm, *cs;
> @@ -103,9 +102,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   	if (err)
>   		goto err_obj;
>   
> -	rq = ERR_PTR(-ENODEV);
> -	with_intel_runtime_pm(engine->i915, wakeref)
> -		rq = i915_request_alloc(engine, ctx);
> +	rq = i915_request_alloc(engine, ctx);
>   	if (IS_ERR(rq)) {
>   		err = PTR_ERR(rq);
>   		goto err_pin;
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8dcba78fb43b..00d3ff746eb1 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2041,8 +2041,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   	}
>   
>   	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
> -	seq_printf(m, "GPU busy? %s [%d requests]\n",
> -		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
> +	seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake));
>   	seq_printf(m, "Boosts outstanding? %d\n",
>   		   atomic_read(&rps->num_waiters));
>   	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
> @@ -2061,9 +2060,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   
>   	seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
>   
> -	if (INTEL_GEN(dev_priv) >= 6 &&
> -	    rps->enabled &&
> -	    dev_priv->gt.active_requests) {
> +	if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && dev_priv->gt.awake) {
>   		u32 rpup, rpupei;
>   		u32 rpdown, rpdownei;
>   
> @@ -3092,9 +3089,9 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>   
>   	wakeref = intel_runtime_pm_get(dev_priv);
>   
> -	seq_printf(m, "GT awake? %s\n", yesno(dev_priv->gt.awake));
> -	seq_printf(m, "Global active requests: %d\n",
> -		   dev_priv->gt.active_requests);
> +	seq_printf(m, "GT awake? %s [%d]\n",
> +		   yesno(dev_priv->gt.awake),
> +		   atomic_read(&dev_priv->gt.wakeref.count));
>   	seq_printf(m, "CS timestamp frequency: %u kHz\n",
>   		   RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
>   
> @@ -3940,8 +3937,7 @@ i915_drop_caches_set(void *data, u64 val)
>   
>   	if (val & DROP_IDLE) {
>   		do {
> -			if (READ_ONCE(i915->gt.active_requests))
> -				flush_delayed_work(&i915->gem.retire_work);
> +			flush_delayed_work(&i915->gem.retire_work);
>   			drain_delayed_work(&i915->gem.idle_work);
>   		} while (READ_ONCE(i915->gt.awake));
>   	}
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 98b997526daa..c8cb70d4fe91 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -47,8 +47,9 @@
>   #include <drm/drm_probe_helper.h>
>   #include <drm/i915_drm.h>
>   
> -#include "gt/intel_workarounds.h"
> +#include "gt/intel_gt_pm.h"
>   #include "gt/intel_reset.h"
> +#include "gt/intel_workarounds.h"
>   
>   #include "i915_drv.h"
>   #include "i915_pmu.h"
> @@ -2323,7 +2324,7 @@ static int i915_drm_resume_early(struct drm_device *dev)
>   
>   	intel_power_domains_resume(dev_priv);
>   
> -	intel_engines_sanitize(dev_priv, true);
> +	intel_gt_sanitize(dev_priv, true);
>   
>   	enable_rpm_wakeref_asserts(dev_priv);
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cbae9be052e0..e5ae6c36e959 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2006,10 +2006,10 @@ struct drm_i915_private {
>   			struct list_head hwsp_free_list;
>   		} timelines;
>   
> -		intel_engine_mask_t active_engines;
>   		struct list_head active_rings;
>   		struct list_head closed_vma;
> -		u32 active_requests;
> +
> +		struct intel_wakeref wakeref;
>   
>   		/**
>   		 * Is the GPU currently considered idle, or busy executing
> @@ -2020,12 +2020,16 @@ struct drm_i915_private {
>   		 */
>   		intel_wakeref_t awake;
>   
> +		struct blocking_notifier_head pm_notifications;
> +
>   		ktime_t last_init_time;
>   
>   		struct i915_vma *scratch;
>   	} gt;
>   
>   	struct {
> +		struct notifier_block pm_notifier;
> +
>   		/**
>   		 * We leave the user IRQ off as much as possible,
>   		 * but this means that requests will finish and never
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 74b99126830b..d0211271f103 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -39,6 +39,8 @@
>   #include <linux/dma-buf.h>
>   #include <linux/mman.h>
>   
> +#include "gt/intel_engine_pm.h"
> +#include "gt/intel_gt_pm.h"
>   #include "gt/intel_mocs.h"
>   #include "gt/intel_reset.h"
>   #include "gt/intel_workarounds.h"
> @@ -2911,9 +2913,6 @@ wait_for_timelines(struct drm_i915_private *i915,
>   	struct i915_gt_timelines *gt = &i915->gt.timelines;
>   	struct i915_timeline *tl;
>   
> -	if (!READ_ONCE(i915->gt.active_requests))
> -		return timeout;
> -
>   	mutex_lock(&gt->mutex);
>   	list_for_each_entry(tl, &gt->active_list, link) {
>   		struct i915_request *rq;
> @@ -2953,9 +2952,10 @@ wait_for_timelines(struct drm_i915_private *i915,
>   int i915_gem_wait_for_idle(struct drm_i915_private *i915,
>   			   unsigned int flags, long timeout)
>   {
> -	GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
> +	GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
>   		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
> -		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
> +		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
> +		  yesno(i915->gt.awake));
>   
>   	/* If the device is asleep, we have no requests outstanding */
>   	if (!READ_ONCE(i915->gt.awake))
> @@ -4177,7 +4177,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>   	 * it may impact the display and we are uncertain about the stability
>   	 * of the reset, so this could be applied to even earlier gen.
>   	 */
> -	intel_engines_sanitize(i915, false);
> +	intel_gt_sanitize(i915, false);
>   
>   	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
>   	intel_runtime_pm_put(i915, wakeref);
> @@ -4235,27 +4235,6 @@ static void init_unused_rings(struct drm_i915_private *dev_priv)
>   	}
>   }
>   
> -static int __i915_gem_restart_engines(void *data)
> -{
> -	struct drm_i915_private *i915 = data;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	int err;
> -
> -	for_each_engine(engine, i915, id) {
> -		err = engine->init_hw(engine);
> -		if (err) {
> -			DRM_ERROR("Failed to restart %s (%d)\n",
> -				  engine->name, err);
> -			return err;
> -		}
> -	}
> -
> -	intel_engines_set_scheduler_caps(i915);
> -
> -	return 0;
> -}
> -
>   int i915_gem_init_hw(struct drm_i915_private *dev_priv)
>   {
>   	int ret;
> @@ -4314,12 +4293,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
>   	intel_mocs_init_l3cc_table(dev_priv);
>   
>   	/* Only when the HW is re-initialised, can we replay the requests */
> -	ret = __i915_gem_restart_engines(dev_priv);
> +	ret = intel_engines_resume(dev_priv);
>   	if (ret)
>   		goto cleanup_uc;
>   
>   	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
>   
> +	intel_engines_set_scheduler_caps(dev_priv);
>   	return 0;
>   
>   cleanup_uc:
> @@ -4625,6 +4605,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   err_init_hw:
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
>   
> +	i915_gem_set_wedged(dev_priv);
>   	i915_gem_suspend(dev_priv);
>   	i915_gem_suspend_late(dev_priv);
>   
> @@ -4686,6 +4667,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   
>   void i915_gem_fini(struct drm_i915_private *dev_priv)
>   {
> +	GEM_BUG_ON(dev_priv->gt.awake);
> +
>   	i915_gem_suspend_late(dev_priv);
>   	intel_disable_gt_powersave(dev_priv);
>   
> @@ -4780,6 +4763,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
>   {
>   	int err;
>   
> +	intel_gt_pm_init(dev_priv);
> +
>   	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
>   	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index 9074eb1e843f..67f8a4a807a0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -75,9 +75,6 @@ struct drm_i915_private;
>   
>   #define I915_GEM_IDLE_TIMEOUT (HZ / 5)
>   
> -void i915_gem_park(struct drm_i915_private *i915);
> -void i915_gem_unpark(struct drm_i915_private *i915);
> -
>   static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
>   {
>   	if (!atomic_fetch_inc(&t->count))
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 3eb1a664b5fa..76ed74e75d82 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -824,26 +824,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
>   	return 0;
>   }
>   
> -static struct i915_request *
> -last_request_on_engine(struct i915_timeline *timeline,
> -		       struct intel_engine_cs *engine)
> -{
> -	struct i915_request *rq;
> -
> -	GEM_BUG_ON(timeline == &engine->timeline);
> -
> -	rq = i915_active_request_raw(&timeline->last_request,
> -				     &engine->i915->drm.struct_mutex);
> -	if (rq && rq->engine->mask & engine->mask) {
> -		GEM_TRACE("last request on engine %s: %llx:%llu\n",
> -			  engine->name, rq->fence.context, rq->fence.seqno);
> -		GEM_BUG_ON(rq->timeline != timeline);
> -		return rq;
> -	}
> -
> -	return NULL;
> -}
> -
>   struct context_barrier_task {
>   	struct i915_active base;
>   	void (*task)(void *data);
> @@ -871,7 +851,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   	struct drm_i915_private *i915 = ctx->i915;
>   	struct context_barrier_task *cb;
>   	struct intel_context *ce, *next;
> -	intel_wakeref_t wakeref;
>   	int err = 0;
>   
>   	lockdep_assert_held(&i915->drm.struct_mutex);
> @@ -884,7 +863,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   	i915_active_init(i915, &cb->base, cb_retire);
>   	i915_active_acquire(&cb->base);
>   
> -	wakeref = intel_runtime_pm_get(i915);
>   	rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
>   		struct intel_engine_cs *engine = ce->engine;
>   		struct i915_request *rq;
> @@ -914,7 +892,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   		if (err)
>   			break;
>   	}
> -	intel_runtime_pm_put(i915, wakeref);
>   
>   	cb->task = err ? NULL : task; /* caller needs to unwind instead */
>   	cb->data = data;
> @@ -924,54 +901,6 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   	return err;
>   }
>   
> -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
> -				      intel_engine_mask_t mask)
> -{
> -	struct intel_engine_cs *engine;
> -
> -	GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake));
> -
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -	GEM_BUG_ON(!i915->kernel_context);
> -
> -	/* Inoperable, so presume the GPU is safely pointing into the void! */
> -	if (i915_terminally_wedged(i915))
> -		return 0;
> -
> -	for_each_engine_masked(engine, i915, mask, mask) {
> -		struct intel_ring *ring;
> -		struct i915_request *rq;
> -
> -		rq = i915_request_create(engine->kernel_context);
> -		if (IS_ERR(rq))
> -			return PTR_ERR(rq);
> -
> -		/* Queue this switch after all other activity */
> -		list_for_each_entry(ring, &i915->gt.active_rings, active_link) {
> -			struct i915_request *prev;
> -
> -			prev = last_request_on_engine(ring->timeline, engine);
> -			if (!prev)
> -				continue;
> -
> -			if (prev->gem_context == i915->kernel_context)
> -				continue;
> -
> -			GEM_TRACE("add barrier on %s for %llx:%lld\n",
> -				  engine->name,
> -				  prev->fence.context,
> -				  prev->fence.seqno);
> -			i915_sw_fence_await_sw_fence_gfp(&rq->submit,
> -							 &prev->submit,
> -							 I915_FENCE_GFP);
> -		}
> -
> -		i915_request_add(rq);
> -	}
> -
> -	return 0;
> -}
> -
>   static int get_ppgtt(struct drm_i915_file_private *file_priv,
>   		     struct i915_gem_context *ctx,
>   		     struct drm_i915_gem_context_param *args)
> @@ -1169,9 +1098,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
>   static int
>   gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>   {
> -	struct drm_i915_private *i915 = ce->engine->i915;
>   	struct i915_request *rq;
> -	intel_wakeref_t wakeref;
>   	int ret;
>   
>   	lockdep_assert_held(&ce->pin_mutex);
> @@ -1185,14 +1112,9 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>   	if (!intel_context_is_pinned(ce))
>   		return 0;
>   
> -	/* Submitting requests etc needs the hw awake. */
> -	wakeref = intel_runtime_pm_get(i915);
> -
>   	rq = i915_request_create(ce->engine->kernel_context);
> -	if (IS_ERR(rq)) {
> -		ret = PTR_ERR(rq);
> -		goto out_put;
> -	}
> +	if (IS_ERR(rq))
> +		return PTR_ERR(rq);
>   
>   	/* Queue this switch after all other activity by this context. */
>   	ret = i915_active_request_set(&ce->ring->timeline->last_request, rq);
> @@ -1216,9 +1138,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
>   
>   out_add:
>   	i915_request_add(rq);
> -out_put:
> -	intel_runtime_pm_put(i915, wakeref);
> -
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index cec278ab04e2..5a8e080499fb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -141,10 +141,6 @@ int i915_gem_context_open(struct drm_i915_private *i915,
>   			  struct drm_file *file);
>   void i915_gem_context_close(struct drm_file *file);
>   
> -int i915_switch_context(struct i915_request *rq);
> -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
> -				      intel_engine_mask_t engine_mask);
> -
>   void i915_gem_context_release(struct kref *ctx_ref);
>   struct i915_gem_context *
>   i915_gem_context_create_gvt(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 060f5903544a..0bdb3e072ba5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -36,15 +36,8 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
>   	bool fail_if_busy:1;
>   } igt_evict_ctl;)
>   
> -static bool ggtt_is_idle(struct drm_i915_private *i915)
> -{
> -	return !i915->gt.active_requests;
> -}
> -
>   static int ggtt_flush(struct drm_i915_private *i915)
>   {
> -	int err;
> -
>   	/*
>   	 * Not everything in the GGTT is tracked via vma (otherwise we
>   	 * could evict as required with minimal stalling) so we are forced
> @@ -52,19 +45,10 @@ static int ggtt_flush(struct drm_i915_private *i915)
>   	 * the hopes that we can then remove contexts and the like only
>   	 * bound by their active reference.
>   	 */
> -	err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines);
> -	if (err)
> -		return err;
> -
> -	err = i915_gem_wait_for_idle(i915,
> -				     I915_WAIT_INTERRUPTIBLE |
> -				     I915_WAIT_LOCKED,
> -				     MAX_SCHEDULE_TIMEOUT);
> -	if (err)
> -		return err;
> -
> -	GEM_BUG_ON(!ggtt_is_idle(i915));
> -	return 0;
> +	return i915_gem_wait_for_idle(i915,
> +				      I915_WAIT_INTERRUPTIBLE |
> +				      I915_WAIT_LOCKED,
> +				      MAX_SCHEDULE_TIMEOUT);
>   }
>   
>   static bool
> @@ -222,24 +206,17 @@ i915_gem_evict_something(struct i915_address_space *vm,
>   	 * us a termination condition, when the last retired context is
>   	 * the kernel's there is no more we can evict.
>   	 */
> -	if (!ggtt_is_idle(dev_priv)) {
> -		if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
> -			return -EBUSY;
> +	if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
> +		return -EBUSY;
>   
> -		ret = ggtt_flush(dev_priv);
> -		if (ret)
> -			return ret;
> +	ret = ggtt_flush(dev_priv);
> +	if (ret)
> +		return ret;
>   
> -		cond_resched();
> -		goto search_again;
> -	}
> +	cond_resched();
>   
> -	/*
> -	 * If we still have pending pageflip completions, drop
> -	 * back to userspace to give our workqueues time to
> -	 * acquire our locks and unpin the old scanouts.
> -	 */
> -	return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC;
> +	flags |= PIN_NONBLOCK;
> +	goto search_again;
>   
>   found:
>   	/* drm_mm doesn't allow any other other operations while
> diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c
> index 9fb0e8d567a2..3554d55dae35 100644
> --- a/drivers/gpu/drm/i915/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/i915_gem_pm.c
> @@ -4,136 +4,63 @@
>    * Copyright © 2019 Intel Corporation
>    */
>   
> +#include "gt/intel_gt_pm.h"
> +
>   #include "i915_drv.h"
>   #include "i915_gem_pm.h"
>   #include "i915_globals.h"
> -#include "intel_pm.h"
>   
> -static void __i915_gem_park(struct drm_i915_private *i915)
> +static void i915_gem_park(struct drm_i915_private *i915)
>   {
> -	intel_wakeref_t wakeref;
> -
> -	GEM_TRACE("\n");
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
>   
>   	lockdep_assert_held(&i915->drm.struct_mutex);
> -	GEM_BUG_ON(i915->gt.active_requests);
> -	GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
> -
> -	if (!i915->gt.awake)
> -		return;
> -
> -	/*
> -	 * Be paranoid and flush a concurrent interrupt to make sure
> -	 * we don't reactivate any irq tasklets after parking.
> -	 *
> -	 * FIXME: Note that even though we have waited for execlists to be idle,
> -	 * there may still be an in-flight interrupt even though the CSB
> -	 * is now empty. synchronize_irq() makes sure that a residual interrupt
> -	 * is completed before we continue, but it doesn't prevent the HW from
> -	 * raising a spurious interrupt later. To complete the shield we should
> -	 * coordinate disabling the CS irq with flushing the interrupts.
> -	 */
> -	synchronize_irq(i915->drm.irq);
> -
> -	intel_engines_park(i915);
> -	i915_timelines_park(i915);
> -
> -	i915_pmu_gt_parked(i915);
> -	i915_vma_parked(i915);
> -
> -	wakeref = fetch_and_zero(&i915->gt.awake);
> -	GEM_BUG_ON(!wakeref);
> -
> -	if (INTEL_GEN(i915) >= 6)
> -		gen6_rps_idle(i915);
> -
> -	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
> -
> -	i915_globals_park();
> -}
> -
> -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
> -					  unsigned long mask)
> -{
> -	bool result = true;
> -
> -	/*
> -	 * Even if we fail to switch, give whatever is running a small chance
> -	 * to save itself before we report the failure. Yes, this may be a
> -	 * false positive due to e.g. ENOMEM, caveat emptor!
> -	 */
> -	if (i915_gem_switch_to_kernel_context(i915, mask))
> -		result = false;
>   
> -	if (i915_gem_wait_for_idle(i915,
> -				   I915_WAIT_LOCKED |
> -				   I915_WAIT_FOR_IDLE_BOOST,
> -				   I915_GEM_IDLE_TIMEOUT))
> -		result = false;
> +	for_each_engine(engine, i915, id) {
> +		/*
> +		 * We are committed now to parking the engines, make sure there
> +		 * will be no more interrupts arriving later and the engines
> +		 * are truly idle.
> +		 */
> +		if (wait_for(intel_engine_is_idle(engine), 10)) {
> +			struct drm_printer p = drm_debug_printer(__func__);
>   
> -	if (!result) {
> -		if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
>   			dev_err(i915->drm.dev,
> -				"Failed to idle engines, declaring wedged!\n");
> -			GEM_TRACE_DUMP();
> +				"%s is not idle before parking\n",
> +				engine->name);
> +			intel_engine_dump(engine, &p, NULL);
>   		}
> +		tasklet_kill(&engine->execlists.tasklet);
>   
> -		/* Forcibly cancel outstanding work and leave the gpu quiet. */
> -		i915_gem_set_wedged(i915);
> +		i915_gem_batch_pool_fini(&engine->batch_pool);
>   	}
>   
> -	i915_retire_requests(i915); /* ensure we flush after wedging */
> -	return result;
> +	i915_timelines_park(i915);
> +	i915_vma_parked(i915);
> +
> +	i915_globals_park();
>   }
>   
>   static void idle_work_handler(struct work_struct *work)
>   {
>   	struct drm_i915_private *i915 =
>   		container_of(work, typeof(*i915), gem.idle_work.work);
> -	bool rearm_hangcheck;
> -
> -	if (!READ_ONCE(i915->gt.awake))
> -		return;
> -
> -	if (READ_ONCE(i915->gt.active_requests))
> -		return;
> -
> -	rearm_hangcheck =
> -		cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
>   
>   	if (!mutex_trylock(&i915->drm.struct_mutex)) {
>   		/* Currently busy, come back later */
>   		mod_delayed_work(i915->wq,
>   				 &i915->gem.idle_work,
>   				 msecs_to_jiffies(50));
> -		goto out_rearm;
> +		return;
>   	}
>   
> -	/*
> -	 * Flush out the last user context, leaving only the pinned
> -	 * kernel context resident. Should anything unfortunate happen
> -	 * while we are idle (such as the GPU being power cycled), no users
> -	 * will be harmed.
> -	 */
> -	if (!work_pending(&i915->gem.idle_work.work) &&
> -	    !i915->gt.active_requests) {
> -		++i915->gt.active_requests; /* don't requeue idle */
> -
> -		switch_to_kernel_context_sync(i915, i915->gt.active_engines);
> -
> -		if (!--i915->gt.active_requests) {
> -			__i915_gem_park(i915);
> -			rearm_hangcheck = false;
> -		}
> -	}
> +	intel_wakeref_lock(&i915->gt.wakeref);
> +	if (!intel_wakeref_active(&i915->gt.wakeref))
> +		i915_gem_park(i915);
> +	intel_wakeref_unlock(&i915->gt.wakeref);
>   
>   	mutex_unlock(&i915->drm.struct_mutex);
> -
> -out_rearm:
> -	if (rearm_hangcheck) {
> -		GEM_BUG_ON(!i915->gt.awake);
> -		i915_queue_hangcheck(i915);
> -	}
>   }
>   
>   static void retire_work_handler(struct work_struct *work)
> @@ -147,97 +74,76 @@ static void retire_work_handler(struct work_struct *work)
>   		mutex_unlock(&i915->drm.struct_mutex);
>   	}
>   
> -	/*
> -	 * Keep the retire handler running until we are finally idle.
> -	 * We do not need to do this test under locking as in the worst-case
> -	 * we queue the retire worker once too often.
> -	 */
> -	if (READ_ONCE(i915->gt.awake))
> +	if (intel_wakeref_active(&i915->gt.wakeref))
>   		queue_delayed_work(i915->wq,
>   				   &i915->gem.retire_work,
>   				   round_jiffies_up_relative(HZ));
>   }
>   
> -void i915_gem_park(struct drm_i915_private *i915)
> +static int pm_notifier(struct notifier_block *nb,
> +		       unsigned long action,
> +		       void *data)
>   {
> -	GEM_TRACE("\n");
> +	struct drm_i915_private *i915 =
> +		container_of(nb, typeof(*i915), gem.pm_notifier);
>   
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -	GEM_BUG_ON(i915->gt.active_requests);
> +	switch (action) {
> +	case INTEL_GT_UNPARK:
> +		i915_globals_unpark();
> +		queue_delayed_work(i915->wq,
> +				   &i915->gem.retire_work,
> +				   round_jiffies_up_relative(HZ));
> +		break;
>   
> -	if (!i915->gt.awake)
> -		return;
> +	case INTEL_GT_PARK:
> +		mod_delayed_work(i915->wq,
> +				 &i915->gem.idle_work,
> +				 msecs_to_jiffies(100));
> +		break;
> +	}
>   
> -	/* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
> -	mod_delayed_work(i915->wq, &i915->gem.idle_work, msecs_to_jiffies(100));
> +	return NOTIFY_OK;
>   }
>   
> -void i915_gem_unpark(struct drm_i915_private *i915)
> +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
>   {
> -	GEM_TRACE("\n");
> -
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -	GEM_BUG_ON(!i915->gt.active_requests);
> -	assert_rpm_wakelock_held(i915);
> -
> -	if (i915->gt.awake)
> -		return;
> -
> -	/*
> -	 * It seems that the DMC likes to transition between the DC states a lot
> -	 * when there are no connected displays (no active power domains) during
> -	 * command submission.
> -	 *
> -	 * This activity has negative impact on the performance of the chip with
> -	 * huge latencies observed in the interrupt handler and elsewhere.
> -	 *
> -	 * Work around it by grabbing a GT IRQ power domain whilst there is any
> -	 * GT activity, preventing any DC state transitions.
> -	 */
> -	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
> -	GEM_BUG_ON(!i915->gt.awake);
> -
> -	i915_globals_unpark();
> -
> -	intel_enable_gt_powersave(i915);
> -	i915_update_gfx_val(i915);
> -	if (INTEL_GEN(i915) >= 6)
> -		gen6_rps_busy(i915);
> -	i915_pmu_gt_unparked(i915);
> -
> -	intel_engines_unpark(i915);
> +	bool result = true;
>   
> -	i915_queue_hangcheck(i915);
> +	do {
> +		if (i915_gem_wait_for_idle(i915,
> +					   I915_WAIT_LOCKED |
> +					   I915_WAIT_FOR_IDLE_BOOST,
> +					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
> +			/* XXX hide warning from gem_eio */
> +			if (i915_modparams.reset) {
> +				dev_err(i915->drm.dev,
> +					"Failed to idle engines, declaring wedged!\n");
> +				GEM_TRACE_DUMP();
> +			}
> +
> +			/*
> +			 * Forcibly cancel outstanding work and leave
> +			 * the gpu quiet.
> +			 */
> +			i915_gem_set_wedged(i915);
> +			result = false;
> +		}
> +	} while (i915_retire_requests(i915) && result);
>   
> -	queue_delayed_work(i915->wq,
> -			   &i915->gem.retire_work,
> -			   round_jiffies_up_relative(HZ));
> +	GEM_BUG_ON(i915->gt.awake);
> +	return result;
>   }
>   
>   bool i915_gem_load_power_context(struct drm_i915_private *i915)
>   {
> -	/* Force loading the kernel context on all engines */
> -	if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
> -		return false;
> -
> -	/*
> -	 * Immediately park the GPU so that we enable powersaving and
> -	 * treat it as idle. The next time we issue a request, we will
> -	 * unpark and start using the engine->pinned_default_state, otherwise
> -	 * it is in limbo and an early reset may fail.
> -	 */
> -	__i915_gem_park(i915);
> -
> -	return true;
> +	return switch_to_kernel_context_sync(i915);
>   }
>   
>   void i915_gem_suspend(struct drm_i915_private *i915)
>   {
> -	intel_wakeref_t wakeref;
> -
>   	GEM_TRACE("\n");
>   
> -	wakeref = intel_runtime_pm_get(i915);
> +	flush_workqueue(i915->wq);
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   
> @@ -250,10 +156,16 @@ void i915_gem_suspend(struct drm_i915_private *i915)
>   	 * state. Fortunately, the kernel_context is disposable and we do
>   	 * not rely on its state.
>   	 */
> -	switch_to_kernel_context_sync(i915, i915->gt.active_engines);
> +	switch_to_kernel_context_sync(i915);
>   
>   	mutex_unlock(&i915->drm.struct_mutex);
> -	i915_reset_flush(i915);
> +
> +	/*
> +	 * Assert that we successfully flushed all the work and
> +	 * reset the GPU back to its idle, low power state.
> +	 */
> +	GEM_BUG_ON(i915->gt.awake);
> +	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
>   
>   	drain_delayed_work(&i915->gem.retire_work);
>   
> @@ -263,17 +175,9 @@ void i915_gem_suspend(struct drm_i915_private *i915)
>   	 */
>   	drain_delayed_work(&i915->gem.idle_work);
>   
> -	flush_workqueue(i915->wq);
> -
> -	/*
> -	 * Assert that we successfully flushed all the work and
> -	 * reset the GPU back to its idle, low power state.
> -	 */
> -	GEM_BUG_ON(i915->gt.awake);
> +	i915_gem_drain_freed_objects(i915);
>   
>   	intel_uc_suspend(i915);
> -
> -	intel_runtime_pm_put(i915, wakeref);
>   }
>   
>   void i915_gem_suspend_late(struct drm_i915_private *i915)
> @@ -362,4 +266,8 @@ void i915_gem_init__pm(struct drm_i915_private *i915)
>   {
>   	INIT_DELAYED_WORK(&i915->gem.idle_work, idle_work_handler);
>   	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
> +
> +	i915->gem.pm_notifier.notifier_call = pm_notifier;
> +	blocking_notifier_chain_register(&i915->gt.pm_notifications,
> +					 &i915->gem.pm_notifier);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h
> index 52f65e3f06b5..6f7d5d11ac3b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_pm.h
> +++ b/drivers/gpu/drm/i915/i915_gem_pm.h
> @@ -17,9 +17,6 @@ void i915_gem_init__pm(struct drm_i915_private *i915);
>   bool i915_gem_load_power_context(struct drm_i915_private *i915);
>   void i915_gem_resume(struct drm_i915_private *i915);
>   
> -void i915_gem_unpark(struct drm_i915_private *i915);
> -void i915_gem_park(struct drm_i915_private *i915);
> -
>   void i915_gem_idle_work_handler(struct work_struct *work);
>   
>   void i915_gem_suspend(struct drm_i915_private *i915);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index b419d0f59275..2ecd0c6a1c94 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -179,8 +179,6 @@ struct i915_gpu_state {
>   	struct scatterlist *sgl, *fit;
>   };
>   
> -struct i915_gpu_restart;
> -
>   struct i915_gpu_error {
>   	/* For hangcheck timer */
>   #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
> @@ -241,8 +239,6 @@ struct i915_gpu_error {
>   	wait_queue_head_t reset_queue;
>   
>   	struct srcu_struct reset_backoff_srcu;
> -
> -	struct i915_gpu_restart *restart;
>   };
>   
>   struct drm_i915_error_state_buf {
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 672c9ea6c24f..d116b5e69826 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -431,6 +431,8 @@ void __i915_request_submit(struct i915_request *request)
>   	/* Transfer from per-context onto the global per-engine timeline */
>   	move_to_timeline(request, &engine->timeline);
>   
> +	engine->serial++;
> +
>   	trace_i915_request_execute(request);
>   }
>   
> @@ -1146,7 +1148,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
>   	list_add_tail(&rq->ring_link, &ring->request_list);
>   	if (list_is_first(&rq->ring_link, &ring->request_list))
>   		list_add(&ring->active_link, &rq->i915->gt.active_rings);
> -	rq->i915->gt.active_engines |= rq->engine->mask;
>   	rq->emitted_jiffies = jiffies;
>   
>   	/*
> @@ -1418,21 +1419,20 @@ long i915_request_wait(struct i915_request *rq,
>   	return timeout;
>   }
>   
> -void i915_retire_requests(struct drm_i915_private *i915)
> +bool i915_retire_requests(struct drm_i915_private *i915)
>   {
>   	struct intel_ring *ring, *tmp;
>   
>   	lockdep_assert_held(&i915->drm.struct_mutex);
>   
> -	if (!i915->gt.active_requests)
> -		return;
> -
>   	list_for_each_entry_safe(ring, tmp,
>   				 &i915->gt.active_rings, active_link) {
>   		intel_ring_get(ring); /* last rq holds reference! */
>   		ring_retire_requests(ring);
>   		intel_ring_put(ring);
>   	}
> +
> +	return !list_empty(&i915->gt.active_rings);
>   }
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 36f13b74ec58..1eee7416af31 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -425,6 +425,6 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
>   	rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
>   }
>   
> -void i915_retire_requests(struct drm_i915_private *i915);
> +bool i915_retire_requests(struct drm_i915_private *i915);
>   
>   #endif /* I915_REQUEST_H */
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 13f823ff8083..fd9d3b0d9f47 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -466,26 +466,22 @@ void intel_uc_reset_prepare(struct drm_i915_private *i915)
>   	intel_uc_sanitize(i915);
>   }
>   
> -int intel_uc_suspend(struct drm_i915_private *i915)
> +void intel_uc_suspend(struct drm_i915_private *i915)
>   {
>   	struct intel_guc *guc = &i915->guc;
> +	intel_wakeref_t wakeref;
>   	int err;
>   
> -	if (!USES_GUC(i915))
> -		return 0;
> -
>   	if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
> -		return 0;
> -
> -	err = intel_guc_suspend(guc);
> -	if (err) {
> -		DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
> -		return err;
> -	}
> +		return;
>   
> -	guc_disable_communication(guc);
> +	with_intel_runtime_pm(i915, wakeref) {
> +		err = intel_guc_suspend(guc);
> +		if (err)
> +			DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
>   
> -	return 0;
> +		guc_disable_communication(guc);
> +	}
>   }
>   
>   int intel_uc_resume(struct drm_i915_private *i915)
> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> index c14729786652..c92436b1f1c5 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -39,7 +39,7 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
>   int intel_uc_init(struct drm_i915_private *dev_priv);
>   void intel_uc_fini(struct drm_i915_private *dev_priv);
>   void intel_uc_reset_prepare(struct drm_i915_private *i915);
> -int intel_uc_suspend(struct drm_i915_private *dev_priv);
> +void intel_uc_suspend(struct drm_i915_private *i915);
>   int intel_uc_resume(struct drm_i915_private *dev_priv);
>   
>   static inline bool intel_uc_is_using_guc(struct drm_i915_private *i915)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index 6fd70d326468..0342de369d3e 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -16,26 +16,18 @@ static int switch_to_context(struct drm_i915_private *i915,
>   {
>   	struct intel_engine_cs *engine;
>   	enum intel_engine_id id;
> -	intel_wakeref_t wakeref;
> -	int err = 0;
> -
> -	wakeref = intel_runtime_pm_get(i915);
>   
>   	for_each_engine(engine, i915, id) {
>   		struct i915_request *rq;
>   
>   		rq = i915_request_alloc(engine, ctx);
> -		if (IS_ERR(rq)) {
> -			err = PTR_ERR(rq);
> -			break;
> -		}
> +		if (IS_ERR(rq))
> +			return PTR_ERR(rq);
>   
>   		i915_request_add(rq);
>   	}
>   
> -	intel_runtime_pm_put(i915, wakeref);
> -
> -	return err;
> +	return 0;
>   }
>   
>   static void trash_stolen(struct drm_i915_private *i915)
> @@ -120,7 +112,7 @@ static void pm_resume(struct drm_i915_private *i915)
>   	 * that runtime-pm just works.
>   	 */
>   	with_intel_runtime_pm(i915, wakeref) {
> -		intel_engines_sanitize(i915, false);
> +		intel_gt_sanitize(i915, false);
>   		i915_gem_sanitize(i915);
>   		i915_gem_resume(i915);
>   	}
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 9d646fa1b74e..71d896bbade2 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1608,113 +1608,6 @@ __engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines)
>   	return "none";
>   }
>   
> -static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
> -					  struct i915_gem_context *ctx,
> -					  intel_engine_mask_t engines)
> -{
> -	struct intel_engine_cs *engine;
> -	intel_engine_mask_t tmp;
> -	int pass;
> -
> -	GEM_TRACE("Testing %s\n", __engine_name(i915, engines));
> -	for (pass = 0; pass < 4; pass++) { /* Once busy; once idle; repeat */
> -		bool from_idle = pass & 1;
> -		int err;
> -
> -		if (!from_idle) {
> -			for_each_engine_masked(engine, i915, engines, tmp) {
> -				struct i915_request *rq;
> -
> -				rq = i915_request_alloc(engine, ctx);
> -				if (IS_ERR(rq))
> -					return PTR_ERR(rq);
> -
> -				i915_request_add(rq);
> -			}
> -		}
> -
> -		err = i915_gem_switch_to_kernel_context(i915,
> -							i915->gt.active_engines);
> -		if (err)
> -			return err;
> -
> -		if (!from_idle) {
> -			err = i915_gem_wait_for_idle(i915,
> -						     I915_WAIT_LOCKED,
> -						     MAX_SCHEDULE_TIMEOUT);
> -			if (err)
> -				return err;
> -		}
> -
> -		if (i915->gt.active_requests) {
> -			pr_err("%d active requests remain after switching to kernel context, pass %d (%s) on %s engine%s\n",
> -			       i915->gt.active_requests,
> -			       pass, from_idle ? "idle" : "busy",
> -			       __engine_name(i915, engines),
> -			       is_power_of_2(engines) ? "" : "s");
> -			return -EINVAL;
> -		}
> -
> -		/* XXX Bonus points for proving we are the kernel context! */
> -
> -		mutex_unlock(&i915->drm.struct_mutex);
> -		drain_delayed_work(&i915->gem.idle_work);
> -		mutex_lock(&i915->drm.struct_mutex);
> -	}
> -
> -	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> -		return -EIO;
> -
> -	return 0;
> -}
> -
> -static int igt_switch_to_kernel_context(void *arg)
> -{
> -	struct drm_i915_private *i915 = arg;
> -	struct intel_engine_cs *engine;
> -	struct i915_gem_context *ctx;
> -	enum intel_engine_id id;
> -	intel_wakeref_t wakeref;
> -	int err;
> -
> -	/*
> -	 * A core premise of switching to the kernel context is that
> -	 * if an engine is already idling in the kernel context, we
> -	 * do not emit another request and wake it up. The other being
> -	 * that we do indeed end up idling in the kernel context.
> -	 */
> -
> -	mutex_lock(&i915->drm.struct_mutex);
> -	wakeref = intel_runtime_pm_get(i915);
> -
> -	ctx = kernel_context(i915);
> -	if (IS_ERR(ctx)) {
> -		mutex_unlock(&i915->drm.struct_mutex);
> -		return PTR_ERR(ctx);
> -	}
> -
> -	/* First check idling each individual engine */
> -	for_each_engine(engine, i915, id) {
> -		err = __igt_switch_to_kernel_context(i915, ctx, BIT(id));
> -		if (err)
> -			goto out_unlock;
> -	}
> -
> -	/* Now en masse */
> -	err = __igt_switch_to_kernel_context(i915, ctx, ALL_ENGINES);
> -	if (err)
> -		goto out_unlock;
> -
> -out_unlock:
> -	GEM_TRACE_DUMP_ON(err);
> -
> -	intel_runtime_pm_put(i915, wakeref);
> -	mutex_unlock(&i915->drm.struct_mutex);
> -
> -	kernel_context_close(ctx);
> -	return err;
> -}
> -
>   static void mock_barrier_task(void *data)
>   {
>   	unsigned int *counter = data;
> @@ -1729,7 +1622,6 @@ static int mock_context_barrier(void *arg)
>   	struct drm_i915_private *i915 = arg;
>   	struct i915_gem_context *ctx;
>   	struct i915_request *rq;
> -	intel_wakeref_t wakeref;
>   	unsigned int counter;
>   	int err;
>   
> @@ -1772,9 +1664,7 @@ static int mock_context_barrier(void *arg)
>   		goto out;
>   	}
>   
> -	rq = ERR_PTR(-ENODEV);
> -	with_intel_runtime_pm(i915, wakeref)
> -		rq = i915_request_alloc(i915->engine[RCS0], ctx);
> +	rq = i915_request_alloc(i915->engine[RCS0], ctx);
>   	if (IS_ERR(rq)) {
>   		pr_err("Request allocation failed!\n");
>   		goto out;
> @@ -1824,7 +1714,6 @@ static int mock_context_barrier(void *arg)
>   int i915_gem_context_mock_selftests(void)
>   {
>   	static const struct i915_subtest tests[] = {
> -		SUBTEST(igt_switch_to_kernel_context),
>   		SUBTEST(mock_context_barrier),
>   	};
>   	struct drm_i915_private *i915;
> @@ -1843,7 +1732,6 @@ int i915_gem_context_mock_selftests(void)
>   int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
>   {
>   	static const struct i915_subtest tests[] = {
> -		SUBTEST(igt_switch_to_kernel_context),
>   		SUBTEST(live_nop_switch),
>   		SUBTEST(igt_ctx_exec),
>   		SUBTEST(igt_ctx_readonly),
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index 12203d665a4e..088b2aa05dcd 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -24,6 +24,7 @@
>   
>   #include "../i915_selftest.h"
>   
> +#include "igt_flush_test.h"
>   #include "mock_gem_device.h"
>   #include "huge_gem_object.h"
>   
> @@ -505,19 +506,23 @@ static void disable_retire_worker(struct drm_i915_private *i915)
>   {
>   	i915_gem_shrinker_unregister(i915);
>   
> -	mutex_lock(&i915->drm.struct_mutex);
> -	if (!i915->gt.active_requests++) {
> -		intel_wakeref_t wakeref;
> -
> -		with_intel_runtime_pm(i915, wakeref)
> -			i915_gem_unpark(i915);
> -	}
> -	mutex_unlock(&i915->drm.struct_mutex);
> +	intel_gt_pm_get(i915);
>   
>   	cancel_delayed_work_sync(&i915->gem.retire_work);
>   	cancel_delayed_work_sync(&i915->gem.idle_work);
>   }
>   
> +static void restore_retire_worker(struct drm_i915_private *i915)
> +{
> +	intel_gt_pm_put(i915);
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	igt_flush_test(i915, I915_WAIT_LOCKED);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +
> +	i915_gem_shrinker_register(i915);
> +}
> +
>   static int igt_mmap_offset_exhaustion(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> @@ -615,13 +620,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
>   out:
>   	drm_mm_remove_node(&resv);
>   out_park:
> -	mutex_lock(&i915->drm.struct_mutex);
> -	if (--i915->gt.active_requests)
> -		queue_delayed_work(i915->wq, &i915->gem.retire_work, 0);
> -	else
> -		queue_delayed_work(i915->wq, &i915->gem.idle_work, 0);
> -	mutex_unlock(&i915->drm.struct_mutex);
> -	i915_gem_shrinker_register(i915);
> +	restore_retire_worker(i915);
>   	return err;
>   err_obj:
>   	i915_gem_object_put(obj);
> diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> index 94aee4071a66..e42f3c58536a 100644
> --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
> @@ -11,23 +11,29 @@
>   
>   int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
>   {
> +	int ret = i915_terminally_wedged(i915) ? -EIO : 0;
> +	int repeat = !!(flags & I915_WAIT_LOCKED);
> +
>   	cond_resched();
>   
> -	if (flags & I915_WAIT_LOCKED &&
> -	    i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines)) {
> -		pr_err("Failed to switch back to kernel context; declaring wedged\n");
> -		i915_gem_set_wedged(i915);
> -	}
> +	do {
> +		if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
> +			pr_err("%pS timed out, cancelling all further testing.\n",
> +			       __builtin_return_address(0));
>   
> -	if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
> -		pr_err("%pS timed out, cancelling all further testing.\n",
> -		       __builtin_return_address(0));
> +			GEM_TRACE("%pS timed out.\n",
> +				  __builtin_return_address(0));
> +			GEM_TRACE_DUMP();
>   
> -		GEM_TRACE("%pS timed out.\n", __builtin_return_address(0));
> -		GEM_TRACE_DUMP();
> +			i915_gem_set_wedged(i915);
> +			repeat = 0;
> +			ret = -EIO;
> +		}
>   
> -		i915_gem_set_wedged(i915);
> -	}
> +		/* Ensure we also flush after wedging. */
> +		if (flags & I915_WAIT_LOCKED)
> +			i915_retire_requests(i915);
> +	} while (repeat--);
>   
> -	return i915_terminally_wedged(i915);
> +	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index fb677b4019a0..c072424c6b7c 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -41,11 +41,10 @@ void mock_device_flush(struct drm_i915_private *i915)
>   
>   	lockdep_assert_held(&i915->drm.struct_mutex);
>   
> -	for_each_engine(engine, i915, id)
> -		mock_engine_flush(engine);
> -
> -	i915_retire_requests(i915);
> -	GEM_BUG_ON(i915->gt.active_requests);
> +	do {
> +		for_each_engine(engine, i915, id)
> +			mock_engine_flush(engine);
> +	} while (i915_retire_requests(i915));
>   }
>   
>   static void mock_device_release(struct drm_device *dev)
> @@ -110,10 +109,6 @@ static void mock_retire_work_handler(struct work_struct *work)
>   
>   static void mock_idle_work_handler(struct work_struct *work)
>   {
> -	struct drm_i915_private *i915 =
> -		container_of(work, typeof(*i915), gem.idle_work.work);
> -
> -	i915->gt.active_engines = 0;
>   }
>   
>   static int pm_domain_resume(struct device *dev)
> @@ -185,6 +180,8 @@ struct drm_i915_private *mock_gem_device(void)
>   
>   	mock_uncore_init(&i915->uncore);
>   	i915_gem_init__mm(i915);
> +	intel_gt_pm_init(i915);
> +	atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */
>   
>   	init_waitqueue_head(&i915->gpu_error.wait_queue);
>   	init_waitqueue_head(&i915->gpu_error.reset_queue);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/
  2019-04-23  9:40       ` Jani Nikula
@ 2019-04-23 16:46         ` Rodrigo Vivi
  0 siblings, 0 replies; 68+ messages in thread
From: Rodrigo Vivi @ 2019-04-23 16:46 UTC (permalink / raw)
  To: Jani Nikula; +Cc: intel-gfx

On Tue, Apr 23, 2019 at 12:40:10PM +0300, Jani Nikula wrote:
> 
> I'll want two things:
> 
> * Explicit ack from Rodrigo too


Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

(sorry for being late here)

> 
> * The dependencies merged first, and this one posted as a single
>   patch. I really want this to stand out better, instead of semi-hidden
>   in the middle of a 30+ patch series.

+1.

> 
> 
> Acked-by: Jani Nikula <jani.nikula@intel.com>
> 
> 
> On Tue, 23 Apr 2019, Joonas Lahtinen <joonas.lahtinen@linux.intel.com> wrote:
> > Quoting Joonas Lahtinen (2019-04-18 15:04:49)
> >> + Jani and Rodrigo to comment
> >
> > No objection here and drm-intel-next was freshly tagged, so this is:
> >
> > Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >
> > Regards, Joonas
> >
> >> 
> >> I'm definitely all for doing this, so it's only a matter of the timing.
> >> 
> >> Question is, do we want to do it right now after last drm-intel-next was
> >> tagged, or do we want to wait a couple of release candidates.
> >> 
> >> I'm leaning towards doing this ASAP, as git cherry-pick should
> >> understand that they're just renames, so there should be no issue with
> >> doing the -fixes.
> >> 
> >> Regards, Joonas
> >> 
> >> Quoting Chris Wilson (2019-04-17 10:56:32)
> >> > Start partitioning off the code that talks to the hardware (GT) from the
> >> > uapi layers and move the device facing code under gt/
> >> > 
> >> > One casualty is s/intel_ringbuffer.h/intel_engine.h/ with the plan to
> >> > subdivide that header and body further (and split out the submission
> >> > code from the ringbuffer and logical context handling). This patch aims
> >> > to be simple motion so git can fixup inflight patches with little mess.
> >> > 
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> -- 
> Jani Nikula, Intel Open Source Graphics Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

* Re: [PATCH 06/32] drm/i915: Store the default sseu setup on the engine
  2019-04-17  9:40   ` Tvrtko Ursulin
@ 2019-04-24  9:45     ` Chris Wilson
  0 siblings, 0 replies; 68+ messages in thread
From: Chris Wilson @ 2019-04-24  9:45 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-04-17 10:40:26)
> 
> On 17/04/2019 08:56, Chris Wilson wrote:
> > +/*
> > + * Powergating configuration for a particular (context,engine).
> > + */
> > +struct intel_sseu {
> > +     u8 slice_mask;
> > +     u8 subslice_mask;
> > +     u8 min_eus_per_subslice;
> > +     u8 max_eus_per_subslice;
> > +};
> > +
> > +static inline struct intel_sseu
> > +intel_device_default_sseu(const struct sseu_dev_info *sseu)
> 
> As said before, if you rename this function to intel_device_sseu, 
> intel_convert_device_sseu, or something, I would be fine with the patch. 
> Basically I object to the default in the name since the sseu is passed 
> in and could be any.

I didn't understand that was the objection. I thought you were
complaining about storing the default on the engine rather than pulling
it from i915->runtime_info.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 68+ messages in thread

end of thread, other threads:[~2019-04-24  9:45 UTC | newest]

Thread overview: 68+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-17  7:56 [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
2019-04-17  7:56 ` [PATCH 02/32] drm/i915: Verify workarounds immediately after application Chris Wilson
2019-04-17  7:56 ` [PATCH 03/32] drm/i915: Verify the engine workarounds stick on application Chris Wilson
2019-04-17  7:56 ` [PATCH 04/32] drm/i915: Make workaround verification *optional* Chris Wilson
2019-04-17  9:37   ` Tvrtko Ursulin
2019-04-17  7:56 ` [PATCH 05/32] drm/i915/selftests: Verify whitelist of context registers Chris Wilson
2019-04-17  7:56 ` [PATCH 06/32] drm/i915: Store the default sseu setup on the engine Chris Wilson
2019-04-17  9:40   ` Tvrtko Ursulin
2019-04-24  9:45     ` Chris Wilson
2019-04-17  7:56 ` [PATCH 07/32] drm/i915: Move GraphicsTechnology files under gt/ Chris Wilson
2019-04-17  9:42   ` Tvrtko Ursulin
2019-04-18 12:04   ` Joonas Lahtinen
2019-04-23  8:57     ` Joonas Lahtinen
2019-04-23  9:40       ` Jani Nikula
2019-04-23 16:46         ` Rodrigo Vivi
2019-04-17  7:56 ` [PATCH 08/32] drm/i915: Introduce struct intel_wakeref Chris Wilson
2019-04-17  9:45   ` Tvrtko Ursulin
2019-04-17  7:56 ` [PATCH 09/32] drm/i915: Pull the GEM powermangement coupling into its own file Chris Wilson
2019-04-17  7:56 ` [PATCH 10/32] drm/i915: Introduce context->enter() and context->exit() Chris Wilson
2019-04-17  7:56 ` [PATCH 11/32] drm/i915: Pass intel_context to i915_request_create() Chris Wilson
2019-04-17  7:56 ` [PATCH 12/32] drm/i915: Invert the GEM wakeref hierarchy Chris Wilson
2019-04-18 12:42   ` Tvrtko Ursulin
2019-04-18 13:07     ` Chris Wilson
2019-04-18 13:22       ` Chris Wilson
2019-04-23 13:02   ` Tvrtko Ursulin
2019-04-17  7:56 ` [PATCH 13/32] drm/i915/gvt: Pin the per-engine GVT shadow contexts Chris Wilson
2019-04-17  7:56 ` [PATCH 14/32] drm/i915: Explicitly pin the logical context for execbuf Chris Wilson
2019-04-17  7:56 ` [PATCH 15/32] drm/i915: Export intel_context_instance() Chris Wilson
2019-04-17  7:56 ` [PATCH 16/32] drm/i915/selftests: Use the real kernel context for sseu isolation tests Chris Wilson
2019-04-17  7:56 ` [PATCH 17/32] drm/i915/selftests: Pass around intel_context for sseu Chris Wilson
2019-04-17  7:56 ` [PATCH 18/32] drm/i915: Pass intel_context to intel_context_pin_lock() Chris Wilson
2019-04-17  7:56 ` [PATCH 19/32] drm/i915: Split engine setup/init into two phases Chris Wilson
2019-04-17  7:56 ` [PATCH 20/32] drm/i915: Switch back to an array of logical per-engine HW contexts Chris Wilson
2019-04-17  7:56 ` [PATCH 21/32] drm/i915: Remove intel_context.active_link Chris Wilson
2019-04-17  9:47   ` Tvrtko Ursulin
2019-04-17  7:56 ` [PATCH 22/32] drm/i915: Move i915_request_alloc into selftests/ Chris Wilson
2019-04-17  7:56 ` [PATCH 23/32] drm/i915: Allow multiple user handles to the same VM Chris Wilson
2019-04-17  7:56 ` [PATCH 24/32] drm/i915: Restore control over ppgtt for context creation ABI Chris Wilson
2019-04-17  7:56 ` [PATCH 25/32] drm/i915: Allow a context to define its set of engines Chris Wilson
2019-04-17  9:50   ` Tvrtko Ursulin
2019-04-17  7:56 ` [PATCH 26/32] drm/i915: Re-expose SINGLE_TIMELINE flags for context creation Chris Wilson
2019-04-17  7:56 ` [PATCH 27/32] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
2019-04-17  9:50   ` Tvrtko Ursulin
2019-04-17  7:56 ` [PATCH 28/32] drm/i915: Load balancing across a virtual engine Chris Wilson
2019-04-17 11:26   ` Tvrtko Ursulin
2019-04-17 13:51     ` Chris Wilson
2019-04-17  7:56 ` [PATCH 29/32] drm/i915: Apply an execution_mask to the virtual_engine Chris Wilson
2019-04-17 11:43   ` Tvrtko Ursulin
2019-04-17 11:57     ` Chris Wilson
2019-04-17 12:35       ` Tvrtko Ursulin
2019-04-17 12:46         ` Chris Wilson
2019-04-17 13:32           ` Tvrtko Ursulin
2019-04-18  7:24             ` Chris Wilson
2019-04-17  7:56 ` [PATCH 30/32] drm/i915: Extend execution fence to support a callback Chris Wilson
2019-04-17  7:56 ` [PATCH 31/32] drm/i915/execlists: Virtual engine bonding Chris Wilson
2019-04-18  6:47   ` Tvrtko Ursulin
2019-04-18  6:57     ` Chris Wilson
2019-04-18  8:57       ` Tvrtko Ursulin
2019-04-18  9:13         ` Chris Wilson
2019-04-18  9:50           ` Tvrtko Ursulin
2019-04-18  9:59             ` Chris Wilson
2019-04-18 10:24               ` Tvrtko Ursulin
2019-04-17  7:56 ` [PATCH 32/32] drm/i915: Allow specification of parallel execbuf Chris Wilson
2019-04-17  8:46 ` [PATCH 01/32] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
2019-04-17 11:33 ` ✗ Fi.CI.BAT: failure for series starting with [01/32] " Patchwork
2019-04-18 10:32 ` [PATCH 01/32] " Tvrtko Ursulin
2019-04-18 10:40   ` Chris Wilson
2019-04-23 12:59 ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.