All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [Intel-gfx] [PATCH 07/11] drm/i915/selftests: Check recovery from corrupted LRC
Date: Tue, 25 Feb 2020 08:22:29 +0000	[thread overview]
Message-ID: <20200225082233.274530-7-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20200225082233.274530-1-chris@chris-wilson.co.uk>

Check that we can recover if the LRC is totally corrupted.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 135 +++++++++++++++++++++++++
 1 file changed, 135 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 810f7857ad26..d7f98aada626 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -5292,6 +5292,140 @@ static int live_lrc_isolation(void *arg)
 	return 0;
 }
 
+static void garbage_reset(struct intel_engine_cs *engine,
+			  struct i915_request *rq)
+{
+	const unsigned int bit = I915_RESET_ENGINE + engine->id;
+	unsigned long *lock = &engine->gt->reset.flags;
+
+	if (test_and_set_bit(bit, lock))
+		return;
+
+	tasklet_disable(&engine->execlists.tasklet);
+
+	if (!rq->fence.error)
+		intel_engine_reset(engine, NULL);
+
+	tasklet_enable(&engine->execlists.tasklet);
+	clear_and_wake_up_bit(bit, lock);
+}
+
+static struct i915_request *garbage(struct intel_context *ce,
+				    struct rnd_state *prng)
+{
+	struct i915_request *rq;
+	int err;
+
+	err = intel_context_pin(ce);
+	if (err)
+		return ERR_PTR(err);
+
+	prandom_bytes_state(prng,
+			    ce->lrc_reg_state,
+			    ce->engine->context_size -
+			    LRC_STATE_PN * PAGE_SIZE);
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+	return rq;
+
+err_unpin:
+	intel_context_unpin(ce);
+	return ERR_PTR(err);
+}
+
+static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
+{
+	struct intel_context *ce;
+	struct i915_request *hang;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	hang = garbage(ce, prng);
+	if (IS_ERR(hang)) {
+		err = PTR_ERR(hang);
+		goto err_ce;
+	}
+
+	if (wait_for_submit(engine, hang, HZ / 2)) {
+		i915_request_put(hang);
+		err = -ETIME;
+		goto err_ce;
+	}
+
+	intel_context_set_banned(ce);
+	garbage_reset(engine, hang);
+
+	intel_engine_flush_submission(engine);
+	if (!hang->fence.error) {
+		i915_request_put(hang);
+		pr_err("%s: corrupted context was not reset\n",
+		       engine->name);
+		err = -EINVAL;
+		goto err_ce;
+	}
+
+	if (i915_request_wait(hang, 0, HZ / 2) < 0) {
+		pr_err("%s: corrupted context did not recover\n",
+		       engine->name);
+		i915_request_put(hang);
+		err = -EIO;
+		goto err_ce;
+	}
+	i915_request_put(hang);
+
+err_ce:
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_lrc_garbage(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/*
+	 * Verify that we can recover if one context state is completely
+	 * corrupted.
+	 */
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
+		return 0;
+
+	for_each_engine(engine, gt, id) {
+		I915_RND_STATE(prng);
+		int err = 0, i;
+
+		if (!intel_has_reset_engine(engine->gt))
+			continue;
+
+		intel_engine_pm_get(engine);
+		for (i = 0; i < 3; i++) {
+			err = __lrc_garbage(engine, &prng);
+			if (err)
+				break;
+		}
+		intel_engine_pm_put(engine);
+
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
 {
 	struct intel_context *ce;
@@ -5391,6 +5525,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_lrc_gpr),
 		SUBTEST(live_lrc_isolation),
 		SUBTEST(live_lrc_timestamp),
+		SUBTEST(live_lrc_garbage),
 		SUBTEST(live_pphwsp_runtime),
 	};
 
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2020-02-25  8:22 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-25  8:22 [Intel-gfx] [PATCH 01/11] drm/i915: Drop assertion that active->fence is unchanged Chris Wilson
2020-02-25  8:22 ` [Intel-gfx] [PATCH 02/11] drm/i915: Manually acquire engine-wakeref around use of kernel_context Chris Wilson
2020-02-25  8:22 ` [Intel-gfx] [PATCH 03/11] drm/i915/gt: Pull marking vm as closed underneath the vm->mutex Chris Wilson
2020-02-25  8:22 ` [Intel-gfx] [PATCH 04/11] drm/i915/gem: Cleanup shadow batch after I915_EXEC_SECURE Chris Wilson
2020-02-25 11:15   ` Matthew Auld
2020-02-25  8:22 ` [Intel-gfx] [PATCH 05/11] drm/i915: Protect i915_request_await_start from early waits Chris Wilson
2020-02-25  9:26   ` [Intel-gfx] [PATCH] " Chris Wilson
2020-02-25  8:22 ` [Intel-gfx] [PATCH 06/11] drm/i915/selftests: Verify LRC isolation Chris Wilson
2020-02-25  8:22 ` Chris Wilson [this message]
2020-02-25 18:00   ` [Intel-gfx] [PATCH 07/11] drm/i915/selftests: Check recovery from corrupted LRC Matthew Auld
2020-02-25  8:22 ` [Intel-gfx] [PATCH 08/11] drm/i915/selftests: Be a little more lenient for reset workers Chris Wilson
2020-02-25  8:22 ` [Intel-gfx] [PATCH 09/11] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
2020-02-26 16:41   ` Tvrtko Ursulin
2020-02-26 17:06     ` Chris Wilson
2020-02-25  8:22 ` [Intel-gfx] [PATCH 10/11] drm/i915/gt: Prevent allocation on a banned context Chris Wilson
2020-02-25  8:22 ` [Intel-gfx] [PATCH 11/11] drm/i915/gem: Check that the context wasn't closed during setup Chris Wilson
2020-02-25  8:31 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/11] drm/i915: Drop assertion that active->fence is unchanged Patchwork
2020-02-25  8:57 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2020-02-25  9:57 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/11] drm/i915: Drop assertion that active->fence is unchanged (rev2) Patchwork
2020-02-25 10:23 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-02-25 11:23 ` [Intel-gfx] [PATCH 01/11] drm/i915: Drop assertion that active->fence is unchanged Matthew Auld
2020-02-26 13:22 ` [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [01/11] drm/i915: Drop assertion that active->fence is unchanged (rev2) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200225082233.274530-7-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.