All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michel Thierry <michel.thierry@intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH v8 10/20] drm/i915/selftests: reset engine self tests
Date: Mon, 22 May 2017 10:46:31 -0700	[thread overview]
Message-ID: <20170522174641.25354-11-michel.thierry@intel.com> (raw)
In-Reply-To: <20170522174641.25354-1-michel.thierry@intel.com>

Check that we can reset specific engines, also check the fallback to
full reset if something didn't work.

v2: rebase.
v3: use RESET_ENGINE_IN_PROGRESS flag.

Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 149 +++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index aa31d6c0cdfb..8a3edb8bd440 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -322,6 +322,56 @@ static int igt_global_reset(void *arg)
 	return err;
 }
 
+static int igt_reset_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int reset_count, reset_engine_count;
+	int err = 0;
+
+	/* Check that we can issue a global GPU and engine reset */
+
+	if (!intel_has_gpu_reset(i915))
+		return 0;
+
+	if (!intel_has_reset_engine(i915))
+		return 0;
+
+	set_bit(I915_RESET_ENGINE_IN_PROGRESS, &i915->gpu_error.flags);
+
+	for_each_engine(engine, i915, id) {
+		reset_count = i915_reset_count(&i915->gpu_error);
+		reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
+							     engine);
+
+		err = i915_reset_engine(engine);
+		if (err) {
+			pr_err("i915_reset_engine failed\n");
+			break;
+		}
+
+		if (i915_reset_count(&i915->gpu_error) != reset_count) {
+			pr_err("Full GPU reset recorded! (engine reset expected)\n");
+			err = -EINVAL;
+			break;
+		}
+
+		if (i915_reset_engine_count(&i915->gpu_error, engine) ==
+		    reset_engine_count) {
+			pr_err("No %s engine reset recorded!\n", engine->name);
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	clear_bit(I915_RESET_ENGINE_IN_PROGRESS, &i915->gpu_error.flags);
+	if (i915_terminally_wedged(&i915->gpu_error))
+		err = -EIO;
+
+	return err;
+}
+
 static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
 {
 	u32 reset_count;
@@ -526,13 +576,112 @@ static int igt_reset_queue(void *arg)
 	return err;
 }
 
+static int igt_render_engine_reset_fallback(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine = i915->engine[RCS];
+	struct hang h;
+	struct drm_i915_gem_request *rq;
+	unsigned int reset_count, reset_engine_count;
+	int err = 0;
+
+	/* Check that we can issue a global GPU and engine reset */
+
+	if (!intel_has_gpu_reset(i915))
+		return 0;
+
+	if (!intel_has_reset_engine(i915))
+		return 0;
+
+	set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+	mutex_lock(&i915->drm.struct_mutex);
+
+	err = hang_init(&h, i915);
+	if (err)
+		goto unlock;
+
+	rq = hang_create_request(&h, engine, i915->kernel_context);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto fini;
+	}
+
+	i915_gem_request_get(rq);
+	__i915_add_request(rq, true);
+
+	/* make reset engine fail */
+	rq->fence.error = -EIO;
+
+	if (!wait_for_hang(&h, rq)) {
+		pr_err("Failed to start request %x\n", rq->fence.seqno);
+		err = -EIO;
+		goto fini;
+	}
+
+	reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine);
+	reset_count = fake_hangcheck(rq);
+
+	/* unlock since we'll call handle_error */
+	mutex_unlock(&i915->drm.struct_mutex);
+	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+
+	i915_handle_error(i915, intel_engine_flag(engine), "live test");
+
+	if (i915_reset_engine_count(&i915->gpu_error, engine) !=
+	    reset_engine_count) {
+		pr_err("render engine reset recorded! (full reset expected)\n");
+		err = -EINVAL;
+		goto fini;
+	}
+
+	if (i915_reset_count(&i915->gpu_error) == reset_count) {
+		pr_err("No full GPU reset recorded!\n");
+		err = -EINVAL;
+		goto fini;
+	}
+
+	/*
+	 * by using fence.error = -EIO, full reset sets the wedged flag, do one
+	 * more full reset to re-enable the hw.
+	 */
+	if (i915_terminally_wedged(&i915->gpu_error)) {
+		set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+		mutex_lock(&i915->drm.struct_mutex);
+		rq->fence.error = 0;
+
+		set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
+		i915_reset(i915);
+		GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
+				    &i915->gpu_error.flags));
+
+		if (i915_reset_count(&i915->gpu_error) == reset_count) {
+			pr_err("No full GPU reset recorded!\n");
+			err = -EINVAL;
+			goto fini;
+		}
+	}
+
+fini:
+	hang_fini(&h);
+unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
+
+	if (i915_terminally_wedged(&i915->gpu_error))
+		return -EIO;
+
+	return err;
+}
+
 int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_hang_sanitycheck),
 		SUBTEST(igt_global_reset),
+		SUBTEST(igt_reset_engine),
 		SUBTEST(igt_wait_reset),
 		SUBTEST(igt_reset_queue),
+		SUBTEST(igt_render_engine_reset_fallback),
 	};
 
 	if (!intel_has_gpu_reset(i915))
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2017-05-22 17:46 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-22 17:46 [PATCH v8 00/20] Gen8+ engine-reset Michel Thierry
2017-05-22 17:46 ` [PATCH v8 01/20] drm/i915: Look for active requests earlier in the reset path Michel Thierry
2017-05-22 17:46 ` [PATCH v8 02/20] drm/i915: Update i915.reset to handle engine resets Michel Thierry
2017-05-22 17:46 ` [PATCH v8 03/20] drm/i915: Modify error handler for per engine hang recovery Michel Thierry
2017-06-02 20:16   ` Chris Wilson
2017-06-02 20:38     ` Michel Thierry
2017-06-02 21:29       ` Chris Wilson
2017-06-04 11:47     ` Chris Wilson
2017-06-04 12:06       ` Chris Wilson
2017-06-06  0:40         ` Michel Thierry
2017-06-06 10:16           ` Chris Wilson
2017-06-06 18:51             ` Michel Thierry
2017-06-06  0:45   ` [PATCH v9] " Michel Thierry
2017-06-06 13:03     ` kbuild test robot
2017-06-06 13:05     ` kbuild test robot
2017-05-22 17:46 ` [PATCH v8 04/20] drm/i915: Add support for per engine reset recovery Michel Thierry
2017-05-22 17:46 ` [PATCH v8 05/20] drm/i915: Add engine reset count to error state Michel Thierry
2017-05-22 17:46 ` [PATCH v8 06/20] drm/i915: Export per-engine reset count info to debugfs Michel Thierry
2017-05-22 17:46 ` [PATCH v8 07/20] drm/i915: Carry on with reset even if hw engine is not ready Michel Thierry
2017-05-22 17:46 ` [PATCH v8 08/20] drm/i915: Enable Engine reset and recovery support Michel Thierry
2017-05-22 17:46 ` [PATCH v8 09/20] drm/i915: Add engine reset count in get-reset-stats ioctl Michel Thierry
2017-05-22 17:46 ` Michel Thierry [this message]
2017-05-22 17:46 ` [PATCH v8 11/20] drm/i915/guc: fix mmio whitelist mmio_start offset and add reminder Michel Thierry
2017-05-22 17:46 ` [PATCH v8 12/20] drm/i915/guc: Provide register list to be saved/restored during engine reset Michel Thierry
2017-05-22 17:46 ` [PATCH v8 13/20] drm/i915/guc: Rename the function that resets the GuC Michel Thierry
2017-05-22 17:46 ` [PATCH v8 14/20] drm/i915/guc: Add support for reset engine using GuC commands Michel Thierry
2017-05-22 17:46 ` [PATCH v8 15/20] drm/i915: Watchdog timeout: Pass GuC shared data structure during param load Michel Thierry
2017-05-22 17:46 ` [PATCH v8 16/20] drm/i915: Watchdog timeout: IRQ handler for gen8+ Michel Thierry
2017-05-22 17:46 ` [PATCH v8 17/20] drm/i915: Watchdog timeout: Ringbuffer command emission " Michel Thierry
2017-05-22 17:46 ` [PATCH v8 18/20] drm/i915: Watchdog timeout: DRM kernel interface to set the timeout Michel Thierry
2017-05-22 17:46 ` [PATCH v8 19/20] drm/i915: Watchdog timeout: Include threshold value in error state Michel Thierry
2017-05-22 17:46 ` [PATCH v8 20/20] drm/i915: Watchdog timeout: Export media reset count from GuC to debugfs Michel Thierry
2017-05-22 18:04 ` ✓ Fi.CI.BAT: success for Gen8+ engine-reset (rev11) Patchwork
2017-06-06  1:02 ` ✗ Fi.CI.BAT: failure for Gen8+ engine-reset (rev12) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170522174641.25354-11-michel.thierry@intel.com \
    --to=michel.thierry@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.