[PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore

* [PATCH 01/30] drm/i915/selftests: Exercise potential false lite-restore
@ 2019-10-02 11:19 Chris Wilson
  2019-10-02 11:19 ` [PATCH 02/30] dma-fence: Serialise signal enabling (dma_fence_enable_sw_signaling) Chris Wilson
                   ` (34 more replies)
  0 siblings, 35 replies; 50+ messages in thread
From: Chris Wilson @ 2019-10-02 11:19 UTC (permalink / raw)
  To: intel-gfx

If execlists's lite-restore is based on the common GEM context tag
rather than the per-intel_context LRCA, then a context switch between
two intel_contexts on the same engine derived from the same GEM context
will perform a lite-restore instead of a full context switch. We can
exploit this by poisoning the ringbuffer of the first context and trying
to trick a simple RING_TAIL update (i.e. lite-restore)

v2: Also check what happens if preempt ce[0] with ce[1] (both instances
on the same engine from the same parent context) [Tvrtko]

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c    |  18 ++-
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 184 +++++++++++++++++++++++++
 2 files changed, 198 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9877b6ba13df..431d3b8c3371 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -235,6 +235,16 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     const struct intel_ring *ring,
 				     bool close);
 
+static void __context_pin_acquire(struct intel_context *ce)
+{
+	mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_);
+}
+
+static void __context_pin_release(struct intel_context *ce)
+{
+	mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_);
+}
+
 static void mark_eio(struct i915_request *rq)
 {
 	if (!i915_request_signaled(rq))
@@ -2761,7 +2771,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 
 	/* Proclaim we have exclusive access to the context image! */
-	mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _THIS_IP_);
+	__context_pin_acquire(ce);
 
 	rq = active_request(rq);
 	if (!rq) {
@@ -2825,7 +2835,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	__execlists_reset_reg_state(ce, engine);
 	__execlists_update_reg_state(ce, engine);
 	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
-	mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_);
+	__context_pin_release(ce);
 
 unwind:
 	/* Push back any incomplete requests for replay after the reset. */
@@ -4439,7 +4449,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
 			    bool scrub)
 {
 	GEM_BUG_ON(!intel_context_is_pinned(ce));
-	mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _THIS_IP_);
+	__context_pin_acquire(ce);
 
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
@@ -4465,7 +4475,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
 	intel_ring_update_space(ce->ring);
 
 	__execlists_update_reg_state(ce, engine);
-	mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_);
+	__context_pin_release(ce);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 93f2fcdc49bf..2da4fa5149cb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -79,6 +79,188 @@ static int live_sanitycheck(void *arg)
 	return err;
 }
 
+static int live_unlite_restore(struct drm_i915_private *i915, int prio)
+{
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	struct igt_spinner spin;
+	int err = -ENOMEM;
+
+	/*
+	 * Check that we can correctly context switch between 2 instances
+	 * on the same engine from the same parent context.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	if (igt_spinner_init(&spin, &i915->gt))
+		goto err_unlock;
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		goto err_spin;
+
+	err = 0;
+	for_each_engine(engine, i915, id) {
+		struct intel_context *ce[2] = {};
+		struct i915_request *rq[2];
+		struct igt_live_test t;
+		int n;
+
+		if (prio && !intel_engine_has_preemption(engine))
+			continue;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+			err = -EIO;
+			break;
+		}
+
+		for (n = 0; n < ARRAY_SIZE(ce); n++) {
+			struct intel_context *tmp;
+
+			tmp = intel_context_create(ctx, engine);
+			if (IS_ERR(tmp)) {
+				err = PTR_ERR(tmp);
+				goto err_ce;
+			}
+
+			err = intel_context_pin(tmp);
+			if (err) {
+				intel_context_put(tmp);
+				goto err_ce;
+			}
+
+			/*
+			 * Setup the pair of contexts such that if we
+			 * lite-restore using the RING_TAIL from ce[1] it
+			 * will execute garbage from ce[0]->ring.
+			 */
+			memset(tmp->ring->vaddr,
+			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
+			       tmp->ring->vma->size);
+
+			ce[n] = tmp;
+		}
+		GEM_BUG_ON(!ce[1]->ring->size);
+		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
+
+		local_bh_disable(); /* appease lockdep */
+		__context_pin_acquire(ce[1]);
+		__execlists_update_reg_state(ce[1], engine);
+		__context_pin_release(ce[1]);
+		local_bh_enable();
+
+		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			goto err_ce;
+		}
+
+		i915_request_get(rq[0]);
+		i915_request_add(rq[0]);
+		GEM_BUG_ON(rq[0]->tail > ce[1]->ring->emit);
+
+		if (!igt_wait_for_spinner(&spin, rq[0])) {
+			i915_request_put(rq[0]);
+			goto err_ce;
+		}
+
+		rq[1] = i915_request_create(ce[1]);
+		if (IS_ERR(rq[1])) {
+			err = PTR_ERR(rq[1]);
+			i915_request_put(rq[0]);
+			goto err_ce;
+		}
+
+		if (!prio) {
+			/*
+			 * Ensure we do the switch to ce[1] on completion.
+			 *
+			 * rq[0] is already submitted, so this should reduce
+			 * to a no-op (a wait on a request on the same engine
+			 * uses the submit fence, not the completion fence),
+			 * but it will install a dependency on rq[1] for rq[0]
+			 * that will prevent the pair being reordered by
+			 * timeslicing.
+			 */
+			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+		}
+
+		i915_request_get(rq[1]);
+		i915_request_add(rq[1]);
+		GEM_TRACE("rq[0]:{ head:%x, tail:%x }, rq[1]:{ head:%x, tail:%x }\n",
+			  rq[0]->head, rq[0]->tail,
+			  rq[1]->head, rq[1]->tail);
+		GEM_BUG_ON(rq[1]->tail <= rq[0]->tail);
+		i915_request_put(rq[0]);
+
+		if (prio) {
+			struct i915_sched_attr attr = {
+				.priority = prio,
+			};
+
+			/* Alternatively preempt the spinner with ce[1] */
+			engine->schedule(rq[1], &attr);
+		}
+
+		/* And switch back to ce[0] for good measure */
+		rq[0] = i915_request_create(ce[0]);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			i915_request_put(rq[1]);
+			goto err_ce;
+		}
+
+		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
+		i915_request_add(rq[0]);
+		GEM_TRACE("rq[1]:{ head:%x, tail:%x }, rq[0]:{ head:%x, tail:%x }\n",
+			  rq[1]->head, rq[1]->tail,
+			  rq[0]->head, rq[0]->tail);
+		GEM_BUG_ON(rq[0]->tail > rq[1]->tail);
+		i915_request_put(rq[1]);
+
+err_ce:
+		tasklet_kill(&engine->execlists.tasklet); /* flush submission */
+		igt_spinner_end(&spin);
+		for (n = 0; n < ARRAY_SIZE(ce); n++) {
+			if (IS_ERR_OR_NULL(ce[n]))
+				break;
+
+			intel_context_unpin(ce[n]);
+			intel_context_put(ce[n]);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
+	kernel_context_close(ctx);
+err_spin:
+	igt_spinner_fini(&spin);
+err_unlock:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static int live_unlite_switch(void *arg)
+{
+	return live_unlite_restore(arg, 0);
+}
+
+static int live_unlite_preempt(void *arg)
+{
+	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
+}
+
 static int
 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
 {
@@ -2178,6 +2360,8 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_sanitycheck),
+		SUBTEST(live_unlite_switch),
+		SUBTEST(live_unlite_preempt),
 		SUBTEST(live_timeslice_preempt),
 		SUBTEST(live_busywait_preempt),
 		SUBTEST(live_preempt),
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 50+ messages in thread