All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset()
@ 2024-04-18 17:10 Nirmoy Das
  2024-04-18 17:10 ` [PATCH 2/3] drm/i915 Rename intel_engine_reset to intel_gt_engine_recover Nirmoy Das
                   ` (6 more replies)
  0 siblings, 7 replies; 13+ messages in thread
From: Nirmoy Das @ 2024-04-18 17:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, John.C.Harrison, Nirmoy Das

__intel_gt_reset() is really for resetting engines though
the name might suggest something else. So add two helper functions
to remove confusions with no functional changes.

Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  2 +-
 .../drm/i915/gt/intel_execlists_submission.c  |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt.c            |  2 +-
 drivers/gpu/drm/i915/gt/intel_gt_pm.c         |  2 +-
 drivers/gpu/drm/i915/gt/intel_reset.c         | 43 ++++++++++++++-----
 drivers/gpu/drm/i915/gt/intel_reset.h         |  3 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c      |  2 +-
 drivers/gpu/drm/i915/i915_driver.c            |  2 +-
 8 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 8c44af1c3451..5c8e9ee3b008 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -678,7 +678,7 @@ void intel_engines_release(struct intel_gt *gt)
 	 */
 	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
 	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
-		__intel_gt_reset(gt, ALL_ENGINES);
+		intel_gt_reset_all_engines(gt);
 
 	/* Decouple the backend; but keep the layout for late GPU resets */
 	for_each_engine(engine, gt, id) {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 355aab5b38ba..21829439e686 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2898,7 +2898,7 @@ static void enable_error_interrupt(struct intel_engine_cs *engine)
 		drm_err(&engine->i915->drm,
 			"engine '%s' resumed still in error: %08x\n",
 			engine->name, status);
-		__intel_gt_reset(engine->gt, engine->mask);
+		intel_gt_reset_engine(engine);
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 580b5141ce1e..626b166e67ef 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -832,7 +832,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
 
 	/* Scrub all HW state upon release */
 	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-		__intel_gt_reset(gt, ALL_ENGINES);
+		intel_gt_reset_all_engines(gt);
 }
 
 void intel_gt_driver_release(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 220ac4f92edf..c08fdb65cc69 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -159,7 +159,7 @@ static bool reset_engines(struct intel_gt *gt)
 	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
 		return false;
 
-	return __intel_gt_reset(gt, ALL_ENGINES) == 0;
+	return intel_gt_reset_all_engines(gt) == 0;
 }
 
 static void gt_sanitize(struct intel_gt *gt, bool force)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index c8e9aa41fdea..b825daace58e 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -764,7 +764,7 @@ wa_14015076503_end(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 			 HECI_H_GS1_ER_PREP, 0);
 }
 
-int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
+static int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 {
 	const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
 	reset_func reset;
@@ -795,6 +795,34 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
 	return ret;
 }
 
+/**
+ * intel_gt_reset_all_engines() - Reset all engines in the given gt.
+ * @gt: the GT to reset all engines for.
+ *
+ * This function resets all engines within the given gt.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int intel_gt_reset_all_engines(struct intel_gt *gt)
+{
+	return __intel_gt_reset(gt, ALL_ENGINES);
+}
+
+/**
+ * intel_gt_reset_engine() - Reset a specific engine within a gt.
+ * @engine: engine to be reset.
+ *
+ * This function resets the specified engine within a gt.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int intel_gt_reset_engine(struct intel_engine_cs *engine)
+{
+	return __intel_gt_reset(engine->gt, engine->mask);
+}
+
 bool intel_has_gpu_reset(const struct intel_gt *gt)
 {
 	if (!gt->i915->params.reset)
@@ -978,7 +1006,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
 
 	/* Even if the GPU reset fails, it should still stop the engines */
 	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
-		__intel_gt_reset(gt, ALL_ENGINES);
+		intel_gt_reset_all_engines(gt);
 
 	for_each_engine(engine, gt, id)
 		engine->submit_request = nop_submit_request;
@@ -1089,7 +1117,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	/* We must reset pending GPU events before restoring our submission */
 	ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
 	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
-		ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
+		ok = intel_gt_reset_all_engines(gt) == 0;
 	if (!ok) {
 		/*
 		 * Warn CI about the unrecoverable wedged condition.
@@ -1133,10 +1161,10 @@ static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
 {
 	int err, i;
 
-	err = __intel_gt_reset(gt, ALL_ENGINES);
+	err = intel_gt_reset_all_engines(gt);
 	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
 		msleep(10 * (i + 1));
-		err = __intel_gt_reset(gt, ALL_ENGINES);
+		err = intel_gt_reset_all_engines(gt);
 	}
 	if (err)
 		return err;
@@ -1270,11 +1298,6 @@ void intel_gt_reset(struct intel_gt *gt,
 	goto finish;
 }
 
-static int intel_gt_reset_engine(struct intel_engine_cs *engine)
-{
-	return __intel_gt_reset(engine->gt, engine->mask);
-}
-
 int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 {
 	struct intel_gt *gt = engine->gt;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index f615b30b81c5..c00de353075c 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -54,7 +54,8 @@ int intel_gt_terminally_wedged(struct intel_gt *gt);
 void intel_gt_set_wedged_on_init(struct intel_gt *gt);
 void intel_gt_set_wedged_on_fini(struct intel_gt *gt);
 
-int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
+int intel_gt_reset_engine(struct intel_engine_cs *engine);
+int intel_gt_reset_all_engines(struct intel_gt *gt);
 
 int intel_reset_guc(struct intel_gt *gt);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index f40de408cd3a..2cfc23c58e90 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -281,7 +281,7 @@ static int igt_atomic_reset(void *arg)
 		awake = reset_prepare(gt);
 		p->critical_section_begin();
 
-		err = __intel_gt_reset(gt, ALL_ENGINES);
+		err = intel_gt_reset_all_engines(gt);
 
 		p->critical_section_end();
 		reset_finish(gt, awake);
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 4b9233c07a22..622a24305bc2 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -202,7 +202,7 @@ static void sanitize_gpu(struct drm_i915_private *i915)
 		unsigned int i;
 
 		for_each_gt(gt, i915, i)
-			__intel_gt_reset(gt, ALL_ENGINES);
+			intel_gt_reset_all_engines(gt);
 	}
 }
 
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/3] drm/i915 Rename intel_engine_reset to intel_gt_engine_recover
  2024-04-18 17:10 [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() Nirmoy Das
@ 2024-04-18 17:10 ` Nirmoy Das
  2024-04-18 23:27   ` John Harrison
  2024-04-18 17:10 ` [PATCH 3/3] drm/i915: Fix gt reset with GuC submission disabled Nirmoy Das
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 13+ messages in thread
From: Nirmoy Das @ 2024-04-18 17:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, John.C.Harrison, Nirmoy Das

intel_engine_reset() not only reset a engine but also
tries to recover it so give it a proper name without
any functional changes.

Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 .../drm/i915/gem/selftests/i915_gem_context.c |  2 +-
 .../drm/i915/gt/intel_execlists_submission.c  |  2 +-
 drivers/gpu/drm/i915/gt/intel_reset.c         |  4 ++--
 drivers/gpu/drm/i915/gt/intel_reset.h         |  4 ++--
 drivers/gpu/drm/i915/gt/selftest_hangcheck.c  | 20 +++++++++----------
 drivers/gpu/drm/i915/gt/selftest_mocs.c       |  4 ++--
 drivers/gpu/drm/i915/gt/selftest_reset.c      |  2 +-
 .../gpu/drm/i915/gt/selftest_workarounds.c    |  6 +++---
 8 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 89d4dc8b60c6..4f4cde55f621 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1171,7 +1171,7 @@ __sseu_finish(const char *name,
 	int ret = 0;
 
 	if (flags & TEST_RESET) {
-		ret = intel_engine_reset(ce->engine, "sseu");
+		ret = intel_gt_engine_recover(ce->engine, "sseu");
 		if (ret)
 			goto out;
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 21829439e686..9485a622a704 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2404,7 +2404,7 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
 
 	ring_set_paused(engine, 1); /* Freeze the current request in place */
 	execlists_capture(engine);
-	intel_engine_reset(engine, msg);
+	intel_gt_engine_recover(engine, msg);
 
 	tasklet_enable(&engine->sched_engine->tasklet);
 	clear_and_wake_up_bit(bit, lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b825daace58e..6504e8ba9c58 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1348,7 +1348,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 }
 
 /**
- * intel_engine_reset - reset GPU engine to recover from a hang
+ * intel_gt_engine_recover - reset GPU engine to recover from a hang
  * @engine: engine to reset
  * @msg: reason for GPU reset; or NULL for no drm_notice()
  *
@@ -1360,7 +1360,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
  *  - reset engine (which will force the engine to idle)
  *  - re-init/configure engine
  */
-int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
+int intel_gt_engine_recover(struct intel_engine_cs *engine, const char *msg)
 {
 	int err;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index c00de353075c..be984357bf27 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -31,8 +31,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
 void intel_gt_reset(struct intel_gt *gt,
 		    intel_engine_mask_t stalled_mask,
 		    const char *reason);
-int intel_engine_reset(struct intel_engine_cs *engine,
-		       const char *reason);
+int intel_gt_engine_recover(struct intel_engine_cs *engine,
+			    const char *reason);
 int __intel_engine_reset_bh(struct intel_engine_cs *engine,
 			    const char *reason);
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 9ce8ff1c04fe..9bfda3f2bd24 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -495,9 +495,9 @@ static int igt_reset_nop_engine(void *arg)
 
 				i915_request_add(rq);
 			}
-			err = intel_engine_reset(engine, NULL);
+			err = intel_gt_engine_recover(engine, NULL);
 			if (err) {
-				pr_err("intel_engine_reset(%s) failed, err:%d\n",
+				pr_err("intel_gt_engine_recover(%s) failed, err:%d\n",
 				       engine->name, err);
 				break;
 			}
@@ -574,7 +574,7 @@ static int igt_reset_fail_engine(void *arg)
 					    &gt->reset.flags));
 
 		force_reset_timeout(engine);
-		err = intel_engine_reset(engine, NULL);
+		err = intel_gt_engine_recover(engine, NULL);
 		cancel_reset_timeout(engine);
 		if (err == 0) /* timeouts only generated on gen8+ */
 			goto skip;
@@ -623,9 +623,9 @@ static int igt_reset_fail_engine(void *arg)
 			}
 
 			if (count & 1) {
-				err = intel_engine_reset(engine, NULL);
+				err = intel_gt_engine_recover(engine, NULL);
 				if (err) {
-					GEM_TRACE_ERR("intel_engine_reset(%s) failed, err:%d\n",
+					GEM_TRACE_ERR("intel_gt_engine_recover(%s) failed, err:%d\n",
 						      engine->name, err);
 					GEM_TRACE_DUMP();
 					i915_request_put(last);
@@ -633,10 +633,10 @@ static int igt_reset_fail_engine(void *arg)
 				}
 			} else {
 				force_reset_timeout(engine);
-				err = intel_engine_reset(engine, NULL);
+				err = intel_gt_engine_recover(engine, NULL);
 				cancel_reset_timeout(engine);
 				if (err != -ETIMEDOUT) {
-					pr_err("intel_engine_reset(%s) did not fail, err:%d\n",
+					pr_err("intel_gt_engine_recover(%s) did not fail, err:%d\n",
 					       engine->name, err);
 					i915_request_put(last);
 					break;
@@ -765,9 +765,9 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
 			}
 
 			if (!using_guc) {
-				err = intel_engine_reset(engine, NULL);
+				err = intel_gt_engine_recover(engine, NULL);
 				if (err) {
-					pr_err("intel_engine_reset(%s) failed, err:%d\n",
+					pr_err("intel_gt_engine_recover(%s) failed, err:%d\n",
 					       engine->name, err);
 					goto skip;
 				}
@@ -1085,7 +1085,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
 			}
 
 			if (!using_guc) {
-				err = intel_engine_reset(engine, NULL);
+				err = intel_gt_engine_recover(engine, NULL);
 				if (err) {
 					pr_err("i915_reset_engine(%s:%s): failed, err=%d\n",
 					       engine->name, test_name, err);
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index d73e438fb85f..b7b15dd3163f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -336,7 +336,7 @@ static int active_engine_reset(struct intel_context *ce,
 
 	err = request_add_spin(rq, &spin);
 	if (err == 0 && !using_guc)
-		err = intel_engine_reset(ce->engine, reason);
+		err = intel_gt_engine_recover(ce->engine, reason);
 
 	/* Ensure the reset happens and kills the engine */
 	if (err == 0)
@@ -356,7 +356,7 @@ static int __live_mocs_reset(struct live_mocs *mocs,
 
 	if (intel_has_reset_engine(gt)) {
 		if (!using_guc) {
-			err = intel_engine_reset(ce->engine, "mocs");
+			err = intel_gt_engine_recover(ce->engine, "mocs");
 			if (err)
 				return err;
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 2cfc23c58e90..9eaa1aed9f58 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -115,7 +115,7 @@ __igt_reset_stolen(struct intel_gt *gt,
 	} else {
 		for_each_engine(engine, gt, id) {
 			if (mask & engine->mask)
-				intel_engine_reset(engine, NULL);
+				intel_gt_engine_recover(engine, NULL);
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 14a8b25b6204..eb7516c7cb56 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -256,7 +256,7 @@ static int do_device_reset(struct intel_engine_cs *engine)
 
 static int do_engine_reset(struct intel_engine_cs *engine)
 {
-	return intel_engine_reset(engine, "live_workarounds");
+	return intel_gt_engine_recover(engine, "live_workarounds");
 }
 
 static int do_guc_reset(struct intel_engine_cs *engine)
@@ -1282,7 +1282,7 @@ live_engine_reset_workarounds(void *arg)
 				goto err;
 			}
 
-			ret = intel_engine_reset(engine, "live_workarounds:idle");
+			ret = intel_gt_engine_recover(engine, "live_workarounds:idle");
 			if (ret) {
 				pr_err("%s: Reset failed while idle\n", engine->name);
 				goto err;
@@ -1320,7 +1320,7 @@ live_engine_reset_workarounds(void *arg)
 		}
 
 		if (!using_guc) {
-			ret = intel_engine_reset(engine, "live_workarounds:active");
+			ret = intel_gt_engine_recover(engine, "live_workarounds:active");
 			if (ret) {
 				pr_err("%s: Reset failed on an active spinner\n",
 				       engine->name);
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/3] drm/i915: Fix gt reset with GuC submission disabled
  2024-04-18 17:10 [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() Nirmoy Das
  2024-04-18 17:10 ` [PATCH 2/3] drm/i915 Rename intel_engine_reset to intel_gt_engine_recover Nirmoy Das
@ 2024-04-18 17:10 ` Nirmoy Das
  2024-04-18 23:38   ` John Harrison
  2024-04-18 20:09 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset() Patchwork
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 13+ messages in thread
From: Nirmoy Das @ 2024-04-18 17:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: dri-devel, John.C.Harrison, Nirmoy Das

Currently intel_gt_reset() happens as follows:

reset_prepare() ---> Sends GDRST to GuC, GuC is in GS_MIA_IN_RESET
do_reset()
  intel_gt_reset_all_engines()
    *_engine_reset_prepare() -->RESET_CTL expects running GuC
    *_reset_engines()
intel_gt_init_hw() --> GuC comes out of GS_MIA_IN_RESET with FW loaded.

Fix the issue by sanitizing the GuC only after resetting requested
engines and before intel_gt_init_hw().

Note intel_uc_reset_finish() and intel_uc_reset() are nop when
guc submission is disabled.

Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_reset.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 6504e8ba9c58..bd166f5aca4b 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -907,8 +907,17 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
 	intel_engine_mask_t awake = 0;
 	enum intel_engine_id id;
 
-	/* For GuC mode, ensure submission is disabled before stopping ring */
-	intel_uc_reset_prepare(&gt->uc);
+	/**
+	 * For GuC mode with submission enabled, ensure submission
+	 * is disabled before stopping ring.
+	 *
+	 * For GuC mode with submission disabled, ensure that GuC is not
+	 * sanitized, do that at the end in reset_finish(). reset_prepare()
+	 * is followed by engine reset which in this mode requires GuC to
+	 * be functional to process engine reset events.
+	 */
+	if (intel_uc_uses_guc_submission(&gt->uc))
+		intel_uc_reset_prepare(&gt->uc);
 
 	for_each_engine(engine, gt, id) {
 		if (intel_engine_pm_get_if_awake(engine))
@@ -1255,6 +1264,9 @@ void intel_gt_reset(struct intel_gt *gt,
 
 	intel_overlay_reset(gt->i915);
 
+	/* sanitize uC after engine reset */
+	if (!intel_uc_uses_guc_submission(&gt->uc))
+		intel_uc_reset_prepare(&gt->uc);
 	/*
 	 * Next we need to restore the context, but we don't use those
 	 * yet either...
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset()
  2024-04-18 17:10 [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() Nirmoy Das
  2024-04-18 17:10 ` [PATCH 2/3] drm/i915 Rename intel_engine_reset to intel_gt_engine_recover Nirmoy Das
  2024-04-18 17:10 ` [PATCH 3/3] drm/i915: Fix gt reset with GuC submission disabled Nirmoy Das
@ 2024-04-18 20:09 ` Patchwork
  2024-04-18 20:16 ` ✗ Fi.CI.BAT: failure " Patchwork
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2024-04-18 20:09 UTC (permalink / raw)
  To: Nirmoy Das; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset()
URL   : https://patchwork.freedesktop.org/series/132616/
State : warning

== Summary ==

Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.



^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset()
  2024-04-18 17:10 [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() Nirmoy Das
                   ` (2 preceding siblings ...)
  2024-04-18 20:09 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset() Patchwork
@ 2024-04-18 20:16 ` Patchwork
  2024-04-18 21:46 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset() (rev2) Patchwork
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2024-04-18 20:16 UTC (permalink / raw)
  To: Nirmoy Das; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 7791 bytes --]

== Series Details ==

Series: series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset()
URL   : https://patchwork.freedesktop.org/series/132616/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_14606 -> Patchwork_132616v1
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_132616v1 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_132616v1, please notify your bug team (&quot;I915-ci-infra@lists.freedesktop.org&quot;) to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/index.html

Participating hosts (39 -> 36)
------------------------------

  Additional (2): bat-adlm-1 fi-kbl-8809g 
  Missing    (5): fi-kbl-7567u bat-dg1-7 fi-glk-j4005 fi-elk-e7500 bat-dg2-11 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_132616v1:

### IGT changes ###

#### Possible regressions ####

  * igt@debugfs_test@read_all_entries:
    - bat-atsm-1:         NOTRUN -> [INCOMPLETE][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-atsm-1/igt@debugfs_test@read_all_entries.html

  * igt@kms_busy@basic@modeset:
    - fi-ilk-650:         [PASS][2] -> [INCOMPLETE][3]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/fi-ilk-650/igt@kms_busy@basic@modeset.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/fi-ilk-650/igt@kms_busy@basic@modeset.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@kms_busy@basic@flip:
    - {bat-mtlp-9}:       [PASS][4] -> [ABORT][5]
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/bat-mtlp-9/igt@kms_busy@basic@flip.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-mtlp-9/igt@kms_busy@basic@flip.html

  * igt@kms_busy@basic@modeset:
    - {bat-mtlp-9}:       [PASS][6] -> [DMESG-WARN][7]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/bat-mtlp-9/igt@kms_busy@basic@modeset.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-mtlp-9/igt@kms_busy@basic@modeset.html

  
Known issues
------------

  Here are the changes found in Patchwork_132616v1 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@debugfs_test@basic-hwmon:
    - fi-kbl-8809g:       NOTRUN -> [SKIP][8]
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/fi-kbl-8809g/igt@debugfs_test@basic-hwmon.html
    - bat-adlm-1:         NOTRUN -> [SKIP][9] ([i915#3826])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-adlm-1/igt@debugfs_test@basic-hwmon.html

  * igt@fbdev@eof:
    - bat-adlm-1:         NOTRUN -> [SKIP][10] ([i915#2582]) +3 other tests skip
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-adlm-1/igt@fbdev@eof.html

  * igt@fbdev@info:
    - bat-adlm-1:         NOTRUN -> [SKIP][11] ([i915#1849] / [i915#2582])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-adlm-1/igt@fbdev@info.html

  * igt@gem_close_race@basic-threads:
    - fi-kbl-8809g:       NOTRUN -> [ABORT][12] ([i915#10875])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/fi-kbl-8809g/igt@gem_close_race@basic-threads.html
    - bat-adlm-1:         NOTRUN -> [DMESG-WARN][13] ([i915#10875])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-adlm-1/igt@gem_close_race@basic-threads.html

  * igt@i915_module_load@load:
    - bat-dg2-13:         [PASS][14] -> [INCOMPLETE][15] ([i915#10875])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/bat-dg2-13/igt@i915_module_load@load.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-dg2-13/igt@i915_module_load@load.html
    - bat-dg2-8:          [PASS][16] -> [INCOMPLETE][17] ([i915#10877])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/bat-dg2-8/igt@i915_module_load@load.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-dg2-8/igt@i915_module_load@load.html

  * igt@kms_busy@basic@modeset:
    - fi-cfl-guc:         [PASS][18] -> [INCOMPLETE][19] ([i915#10056])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/fi-cfl-guc/igt@kms_busy@basic@modeset.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/fi-cfl-guc/igt@kms_busy@basic@modeset.html
    - bat-mtlp-8:         [PASS][20] -> [DMESG-WARN][21] ([i915#10875])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/bat-mtlp-8/igt@kms_busy@basic@modeset.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-mtlp-8/igt@kms_busy@basic@modeset.html
    - bat-adls-6:         [PASS][22] -> [ABORT][23] ([i915#10875])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/bat-adls-6/igt@kms_busy@basic@modeset.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-adls-6/igt@kms_busy@basic@modeset.html

  * igt@kms_flip@basic-flip-vs-dpms@b-edp1:
    - bat-jsl-1:          [PASS][24] -> [INCOMPLETE][25] ([i915#10876])
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/bat-jsl-1/igt@kms_flip@basic-flip-vs-dpms@b-edp1.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-jsl-1/igt@kms_flip@basic-flip-vs-dpms@b-edp1.html

  
#### Possible fixes ####

  * igt@gem_close_race@basic-process:
    - bat-dg2-9:          [INCOMPLETE][26] ([i915#10875]) -> [PASS][27]
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/bat-dg2-9/igt@gem_close_race@basic-process.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-dg2-9/igt@gem_close_race@basic-process.html

  * igt@i915_module_load@load:
    - bat-atsm-1:         [INCOMPLETE][28] ([i915#10875]) -> [PASS][29]
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/bat-atsm-1/igt@i915_module_load@load.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/bat-atsm-1/igt@i915_module_load@load.html

  * igt@kms_busy@basic@modeset:
    - fi-bsw-n3050:       [DMESG-WARN][30] ([i915#10875]) -> [PASS][31]
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14606/fi-bsw-n3050/igt@kms_busy@basic@modeset.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/fi-bsw-n3050/igt@kms_busy@basic@modeset.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#10056]: https://gitlab.freedesktop.org/drm/intel/issues/10056
  [i915#10875]: https://gitlab.freedesktop.org/drm/intel/issues/10875
  [i915#10876]: https://gitlab.freedesktop.org/drm/intel/issues/10876
  [i915#10877]: https://gitlab.freedesktop.org/drm/intel/issues/10877
  [i915#1849]: https://gitlab.freedesktop.org/drm/intel/issues/1849
  [i915#2582]: https://gitlab.freedesktop.org/drm/intel/issues/2582
  [i915#3826]: https://gitlab.freedesktop.org/drm/intel/issues/3826


Build changes
-------------

  * Linux: CI_DRM_14606 -> Patchwork_132616v1

  CI-20190529: 20190529
  CI_DRM_14606: 121bb3cb2e7ce2d6f5c35cea84c6ae308cf1da0a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7813: 66841b7d9024447be4f4f5449aaf4f021e6323e5 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_132616v1: 121bb3cb2e7ce2d6f5c35cea84c6ae308cf1da0a @ git://anongit.freedesktop.org/gfx-ci/linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v1/index.html

[-- Attachment #2: Type: text/html, Size: 9352 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset() (rev2)
  2024-04-18 17:10 [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() Nirmoy Das
                   ` (3 preceding siblings ...)
  2024-04-18 20:16 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2024-04-18 21:46 ` Patchwork
  2024-04-18 21:56 ` ✗ Fi.CI.BAT: failure " Patchwork
  2024-04-18 23:27 ` [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() John Harrison
  6 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2024-04-18 21:46 UTC (permalink / raw)
  To: Nirmoy Das; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset() (rev2)
URL   : https://patchwork.freedesktop.org/series/132616/
State : warning

== Summary ==

Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.



^ permalink raw reply	[flat|nested] 13+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset() (rev2)
  2024-04-18 17:10 [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() Nirmoy Das
                   ` (4 preceding siblings ...)
  2024-04-18 21:46 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset() (rev2) Patchwork
@ 2024-04-18 21:56 ` Patchwork
  2024-04-18 23:27 ` [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() John Harrison
  6 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2024-04-18 21:56 UTC (permalink / raw)
  To: Nirmoy Das; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 12596 bytes --]

== Series Details ==

Series: series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset() (rev2)
URL   : https://patchwork.freedesktop.org/series/132616/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_14607 -> Patchwork_132616v2
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_132616v2 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_132616v2, please notify your bug team (&quot;I915-ci-infra@lists.freedesktop.org&quot;) to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/index.html

Participating hosts (37 -> 40)
------------------------------

  Additional (4): fi-glk-j4005 bat-kbl-2 bat-jsl-1 fi-elk-e7500 
  Missing    (1): bat-dg2-11 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_132616v2:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_exec_basic@basic@vecs0-smem:
    - fi-rkl-11600:       NOTRUN -> [DMESG-WARN][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-rkl-11600/igt@gem_exec_basic@basic@vecs0-smem.html

  * igt@kms_busy@basic@flip:
    - fi-bsw-nick:        [PASS][2] -> [INCOMPLETE][3]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/fi-bsw-nick/igt@kms_busy@basic@flip.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-bsw-nick/igt@kms_busy@basic@flip.html

  * igt@kms_busy@basic@modeset:
    - fi-ivb-3770:        [PASS][4] -> [INCOMPLETE][5]
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/fi-ivb-3770/igt@kms_busy@basic@modeset.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-ivb-3770/igt@kms_busy@basic@modeset.html

  * igt@kms_cursor_legacy@basic-flip-after-cursor-atomic:
    - bat-adln-1:         [PASS][6] -> [DMESG-WARN][7]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/bat-adln-1/igt@kms_cursor_legacy@basic-flip-after-cursor-atomic.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-adln-1/igt@kms_cursor_legacy@basic-flip-after-cursor-atomic.html

  * igt@kms_flip@basic-flip-vs-dpms@b-dp1:
    - fi-apl-guc:         NOTRUN -> [INCOMPLETE][8]
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-apl-guc/igt@kms_flip@basic-flip-vs-dpms@b-dp1.html

  * igt@kms_flip@basic-flip-vs-dpms@b-hdmi-a1:
    - fi-cfl-guc:         NOTRUN -> [INCOMPLETE][9]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-cfl-guc/igt@kms_flip@basic-flip-vs-dpms@b-hdmi-a1.html

  * igt@kms_flip@basic-flip-vs-dpms@b-hdmi-a2:
    - fi-glk-j4005:       NOTRUN -> [INCOMPLETE][10]
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-glk-j4005/igt@kms_flip@basic-flip-vs-dpms@b-hdmi-a2.html

  
#### Warnings ####

  * igt@i915_module_load@load:
    - bat-adlm-1:         [INCOMPLETE][11] ([i915#10875]) -> [INCOMPLETE][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/bat-adlm-1/igt@i915_module_load@load.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-adlm-1/igt@i915_module_load@load.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@kms_cursor_legacy@basic-flip-after-cursor-legacy:
    - {bat-mtlp-9}:       NOTRUN -> [ABORT][13]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-mtlp-9/igt@kms_cursor_legacy@basic-flip-after-cursor-legacy.html

  
Known issues
------------

  Here are the changes found in Patchwork_132616v2 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@debugfs_test@basic-hwmon:
    - bat-jsl-1:          NOTRUN -> [SKIP][14] ([i915#9318])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-jsl-1/igt@debugfs_test@basic-hwmon.html

  * igt@gem_huc_copy@huc-copy:
    - fi-elk-e7500:       NOTRUN -> [SKIP][15] +5 other tests skip
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-elk-e7500/igt@gem_huc_copy@huc-copy.html
    - bat-jsl-1:          NOTRUN -> [SKIP][16] ([i915#2190])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-jsl-1/igt@gem_huc_copy@huc-copy.html
    - fi-glk-j4005:       NOTRUN -> [SKIP][17] ([i915#2190])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-glk-j4005/igt@gem_huc_copy@huc-copy.html

  * igt@i915_module_load@load:
    - bat-kbl-2:          NOTRUN -> [INCOMPLETE][18] ([i915#10877])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-kbl-2/igt@i915_module_load@load.html

  * igt@kms_busy@basic@modeset:
    - bat-dg1-7:          [PASS][19] -> [ABORT][20] ([i915#10875])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/bat-dg1-7/igt@kms_busy@basic@modeset.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-dg1-7/igt@kms_busy@basic@modeset.html
    - bat-dg2-14:         [PASS][21] -> [DMESG-WARN][22] ([i915#10875])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/bat-dg2-14/igt@kms_busy@basic@modeset.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-dg2-14/igt@kms_busy@basic@modeset.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - fi-glk-j4005:       NOTRUN -> [SKIP][23] +3 other tests skip
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-glk-j4005/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html
    - bat-adlp-9:         NOTRUN -> [SKIP][24] ([i915#4103]) +1 other test skip
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-adlp-9/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
    - fi-cfl-guc:         NOTRUN -> [SKIP][25] +2 other tests skip
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-cfl-guc/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
    - bat-jsl-1:          NOTRUN -> [SKIP][26] ([i915#4103]) +1 other test skip
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-jsl-1/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
    - bat-arls-1:         NOTRUN -> [SKIP][27] ([i915#10202]) +1 other test skip
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-arls-1/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
    - fi-apl-guc:         NOTRUN -> [SKIP][28] +2 other tests skip
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-apl-guc/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html

  * igt@kms_cursor_legacy@basic-flip-after-cursor-atomic:
    - bat-arls-2:         [PASS][29] -> [ABORT][30] ([i915#10875])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/bat-arls-2/igt@kms_cursor_legacy@basic-flip-after-cursor-atomic.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-arls-2/igt@kms_cursor_legacy@basic-flip-after-cursor-atomic.html
    - bat-arls-1:         NOTRUN -> [ABORT][31] ([i915#10875])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-arls-1/igt@kms_cursor_legacy@basic-flip-after-cursor-atomic.html

  * igt@kms_dsc@dsc-basic:
    - bat-jsl-1:          NOTRUN -> [SKIP][32] ([i915#3555] / [i915#9886])
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-jsl-1/igt@kms_dsc@dsc-basic.html
    - bat-adlp-9:         NOTRUN -> [SKIP][33] ([i915#3555] / [i915#3840])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-adlp-9/igt@kms_dsc@dsc-basic.html

  * igt@kms_flip@basic-flip-vs-dpms@b-edp1:
    - bat-jsl-1:          NOTRUN -> [INCOMPLETE][34] ([i915#10876])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-jsl-1/igt@kms_flip@basic-flip-vs-dpms@b-edp1.html

  * igt@kms_flip@basic-flip-vs-dpms@c-dp1:
    - bat-adlp-9:         NOTRUN -> [INCOMPLETE][35] ([i915#10876])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-adlp-9/igt@kms_flip@basic-flip-vs-dpms@c-dp1.html

  
#### Possible fixes ####

  * igt@kms_busy@basic@flip:
    - {bat-mtlp-9}:       [ABORT][36] -> [PASS][37]
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/bat-mtlp-9/igt@kms_busy@basic@flip.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-mtlp-9/igt@kms_busy@basic@flip.html
    - fi-apl-guc:         [INCOMPLETE][38] -> [PASS][39]
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/fi-apl-guc/igt@kms_busy@basic@flip.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-apl-guc/igt@kms_busy@basic@flip.html
    - fi-pnv-d510:        [ABORT][40] -> [PASS][41]
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/fi-pnv-d510/igt@kms_busy@basic@flip.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-pnv-d510/igt@kms_busy@basic@flip.html
    - {bat-rpls-4}:       [ABORT][42] -> [PASS][43]
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/bat-rpls-4/igt@kms_busy@basic@flip.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-rpls-4/igt@kms_busy@basic@flip.html

  * igt@kms_busy@basic@modeset:
    - fi-cfl-guc:         [INCOMPLETE][44] ([i915#10056]) -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/fi-cfl-guc/igt@kms_busy@basic@modeset.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-cfl-guc/igt@kms_busy@basic@modeset.html
    - {bat-mtlp-9}:       [DMESG-WARN][46] -> [PASS][47]
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/bat-mtlp-9/igt@kms_busy@basic@modeset.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-mtlp-9/igt@kms_busy@basic@modeset.html
    - bat-arls-1:         [ABORT][48] ([i915#10875]) -> [PASS][49]
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/bat-arls-1/igt@kms_busy@basic@modeset.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/bat-arls-1/igt@kms_busy@basic@modeset.html

  * igt@kms_flip@basic-flip-vs-dpms@b-dp1:
    - fi-cfl-8109u:       [INCOMPLETE][50] -> [PASS][51]
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/fi-cfl-8109u/igt@kms_flip@basic-flip-vs-dpms@b-dp1.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-cfl-8109u/igt@kms_flip@basic-flip-vs-dpms@b-dp1.html

  
#### Warnings ####

  * igt@kms_busy@basic@modeset:
    - fi-pnv-d510:        [SKIP][52] -> [ABORT][53] ([i915#10875])
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14607/fi-pnv-d510/igt@kms_busy@basic@modeset.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/fi-pnv-d510/igt@kms_busy@basic@modeset.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#10056]: https://gitlab.freedesktop.org/drm/intel/issues/10056
  [i915#10202]: https://gitlab.freedesktop.org/drm/intel/issues/10202
  [i915#10875]: https://gitlab.freedesktop.org/drm/intel/issues/10875
  [i915#10876]: https://gitlab.freedesktop.org/drm/intel/issues/10876
  [i915#10877]: https://gitlab.freedesktop.org/drm/intel/issues/10877
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3840]: https://gitlab.freedesktop.org/drm/intel/issues/3840
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#4213]: https://gitlab.freedesktop.org/drm/intel/issues/4213
  [i915#9318]: https://gitlab.freedesktop.org/drm/intel/issues/9318
  [i915#9886]: https://gitlab.freedesktop.org/drm/intel/issues/9886


Build changes
-------------

  * Linux: CI_DRM_14607 -> Patchwork_132616v2

  CI-20190529: 20190529
  CI_DRM_14607: cabe88f47c1f688f4493de88acc532bf584efe3c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7813: 66841b7d9024447be4f4f5449aaf4f021e6323e5 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_132616v2: cabe88f47c1f688f4493de88acc532bf584efe3c @ git://anongit.freedesktop.org/gfx-ci/linux

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132616v2/index.html

[-- Attachment #2: Type: text/html, Size: 14493 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset()
  2024-04-18 17:10 [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() Nirmoy Das
                   ` (5 preceding siblings ...)
  2024-04-18 21:56 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2024-04-18 23:27 ` John Harrison
  2024-04-19  8:44   ` Nirmoy Das
  6 siblings, 1 reply; 13+ messages in thread
From: John Harrison @ 2024-04-18 23:27 UTC (permalink / raw)
  To: Nirmoy Das, intel-gfx; +Cc: dri-devel

On 4/18/2024 10:10, Nirmoy Das wrote:
> __intel_gt_reset() is really for resetting engines though
> the name might suggest something else. So add two helper functions
> to remove confusions with no functional changes.
Technically you only added one and just moved the other :). It already 
existed, it just wasn't being used everywhere that it could be!

>
> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  2 +-
>   .../drm/i915/gt/intel_execlists_submission.c  |  2 +-
>   drivers/gpu/drm/i915/gt/intel_gt.c            |  2 +-
>   drivers/gpu/drm/i915/gt/intel_gt_pm.c         |  2 +-
>   drivers/gpu/drm/i915/gt/intel_reset.c         | 43 ++++++++++++++-----
>   drivers/gpu/drm/i915/gt/intel_reset.h         |  3 +-
>   drivers/gpu/drm/i915/gt/selftest_reset.c      |  2 +-
>   drivers/gpu/drm/i915/i915_driver.c            |  2 +-
>   8 files changed, 41 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 8c44af1c3451..5c8e9ee3b008 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -678,7 +678,7 @@ void intel_engines_release(struct intel_gt *gt)
>   	 */
>   	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
>   	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
> -		__intel_gt_reset(gt, ALL_ENGINES);
> +		intel_gt_reset_all_engines(gt);
>   
>   	/* Decouple the backend; but keep the layout for late GPU resets */
>   	for_each_engine(engine, gt, id) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 355aab5b38ba..21829439e686 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -2898,7 +2898,7 @@ static void enable_error_interrupt(struct intel_engine_cs *engine)
>   		drm_err(&engine->i915->drm,
>   			"engine '%s' resumed still in error: %08x\n",
>   			engine->name, status);
> -		__intel_gt_reset(engine->gt, engine->mask);
> +		intel_gt_reset_engine(engine);
>   	}
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 580b5141ce1e..626b166e67ef 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -832,7 +832,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
>   
>   	/* Scrub all HW state upon release */
>   	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
> -		__intel_gt_reset(gt, ALL_ENGINES);
> +		intel_gt_reset_all_engines(gt);
>   }
>   
>   void intel_gt_driver_release(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index 220ac4f92edf..c08fdb65cc69 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -159,7 +159,7 @@ static bool reset_engines(struct intel_gt *gt)
>   	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
>   		return false;
>   
> -	return __intel_gt_reset(gt, ALL_ENGINES) == 0;
> +	return intel_gt_reset_all_engines(gt) == 0;
>   }
>   
>   static void gt_sanitize(struct intel_gt *gt, bool force)
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index c8e9aa41fdea..b825daace58e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -764,7 +764,7 @@ wa_14015076503_end(struct intel_gt *gt, intel_engine_mask_t engine_mask)
>   			 HECI_H_GS1_ER_PREP, 0);
>   }
>   
> -int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
> +static int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
>   {
>   	const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
>   	reset_func reset;
> @@ -795,6 +795,34 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
>   	return ret;
>   }
>   
> +/**
> + * intel_gt_reset_all_engines() - Reset all engines in the given gt.
> + * @gt: the GT to reset all engines for.
> + *
> + * This function resets all engines within the given gt.
> + *
> + * Returns:
> + * Zero on success, negative error code on failure.
> + */
> +int intel_gt_reset_all_engines(struct intel_gt *gt)
> +{
> +	return __intel_gt_reset(gt, ALL_ENGINES);
> +}
> +
> +/**
> + * intel_gt_reset_engine() - Reset a specific engine within a gt.
> + * @engine: engine to be reset.
> + *
> + * This function resets the specified engine within a gt.
> + *
> + * Returns:
> + * Zero on success, negative error code on failure.
> + */
> +int intel_gt_reset_engine(struct intel_engine_cs *engine)
> +{
> +	return __intel_gt_reset(engine->gt, engine->mask);
> +}
> +
You could have just dropped the 'static' from the existing copy of this 
function and added the new version next to it. That would make the diff 
simpler and therefore clearer. Unless you think there is a good reason 
to move the code up here?

John.

>   bool intel_has_gpu_reset(const struct intel_gt *gt)
>   {
>   	if (!gt->i915->params.reset)
> @@ -978,7 +1006,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
>   
>   	/* Even if the GPU reset fails, it should still stop the engines */
>   	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
> -		__intel_gt_reset(gt, ALL_ENGINES);
> +		intel_gt_reset_all_engines(gt);
>   
>   	for_each_engine(engine, gt, id)
>   		engine->submit_request = nop_submit_request;
> @@ -1089,7 +1117,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
>   	/* We must reset pending GPU events before restoring our submission */
>   	ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
>   	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
> -		ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
> +		ok = intel_gt_reset_all_engines(gt) == 0;
>   	if (!ok) {
>   		/*
>   		 * Warn CI about the unrecoverable wedged condition.
> @@ -1133,10 +1161,10 @@ static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
>   {
>   	int err, i;
>   
> -	err = __intel_gt_reset(gt, ALL_ENGINES);
> +	err = intel_gt_reset_all_engines(gt);
>   	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
>   		msleep(10 * (i + 1));
> -		err = __intel_gt_reset(gt, ALL_ENGINES);
> +		err = intel_gt_reset_all_engines(gt);
>   	}
>   	if (err)
>   		return err;
> @@ -1270,11 +1298,6 @@ void intel_gt_reset(struct intel_gt *gt,
>   	goto finish;
>   }
>   
> -static int intel_gt_reset_engine(struct intel_engine_cs *engine)
> -{
> -	return __intel_gt_reset(engine->gt, engine->mask);
> -}
> -
>   int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
>   {
>   	struct intel_gt *gt = engine->gt;
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
> index f615b30b81c5..c00de353075c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.h
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.h
> @@ -54,7 +54,8 @@ int intel_gt_terminally_wedged(struct intel_gt *gt);
>   void intel_gt_set_wedged_on_init(struct intel_gt *gt);
>   void intel_gt_set_wedged_on_fini(struct intel_gt *gt);
>   
> -int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
> +int intel_gt_reset_engine(struct intel_engine_cs *engine);
> +int intel_gt_reset_all_engines(struct intel_gt *gt);
>   
>   int intel_reset_guc(struct intel_gt *gt);
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
> index f40de408cd3a..2cfc23c58e90 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_reset.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
> @@ -281,7 +281,7 @@ static int igt_atomic_reset(void *arg)
>   		awake = reset_prepare(gt);
>   		p->critical_section_begin();
>   
> -		err = __intel_gt_reset(gt, ALL_ENGINES);
> +		err = intel_gt_reset_all_engines(gt);
>   
>   		p->critical_section_end();
>   		reset_finish(gt, awake);
> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> index 4b9233c07a22..622a24305bc2 100644
> --- a/drivers/gpu/drm/i915/i915_driver.c
> +++ b/drivers/gpu/drm/i915/i915_driver.c
> @@ -202,7 +202,7 @@ static void sanitize_gpu(struct drm_i915_private *i915)
>   		unsigned int i;
>   
>   		for_each_gt(gt, i915, i)
> -			__intel_gt_reset(gt, ALL_ENGINES);
> +			intel_gt_reset_all_engines(gt);
>   	}
>   }
>   


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] drm/i915 Rename intel_engine_reset to intel_gt_engine_recover
  2024-04-18 17:10 ` [PATCH 2/3] drm/i915 Rename intel_engine_reset to intel_gt_engine_recover Nirmoy Das
@ 2024-04-18 23:27   ` John Harrison
  2024-04-19  8:48     ` Nirmoy Das
  0 siblings, 1 reply; 13+ messages in thread
From: John Harrison @ 2024-04-18 23:27 UTC (permalink / raw)
  To: Nirmoy Das, intel-gfx; +Cc: dri-devel

On 4/18/2024 10:10, Nirmoy Das wrote:
> intel_engine_reset() not only reset a engine but also
> tries to recover it so give it a proper name without
> any functional changes.
Not seeing what the difference is. If this was a super low level 
function (with an __ prefix for example) then one might expect it to 
literally just poke the reset register and leave the engine in a dead 
state. But as a high level function, I think it is reasonable to expect 
a reset function to 'recover' the entity being reset.

Also, many of the callers are tests that are explicitly testing reset. 
So now the tests all talk about attempting resets, resets failing, etc. 
but around a call to 'recover' instead of 'reset', which seems confusing.

John.

>
> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> ---
>   .../drm/i915/gem/selftests/i915_gem_context.c |  2 +-
>   .../drm/i915/gt/intel_execlists_submission.c  |  2 +-
>   drivers/gpu/drm/i915/gt/intel_reset.c         |  4 ++--
>   drivers/gpu/drm/i915/gt/intel_reset.h         |  4 ++--
>   drivers/gpu/drm/i915/gt/selftest_hangcheck.c  | 20 +++++++++----------
>   drivers/gpu/drm/i915/gt/selftest_mocs.c       |  4 ++--
>   drivers/gpu/drm/i915/gt/selftest_reset.c      |  2 +-
>   .../gpu/drm/i915/gt/selftest_workarounds.c    |  6 +++---
>   8 files changed, 22 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index 89d4dc8b60c6..4f4cde55f621 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -1171,7 +1171,7 @@ __sseu_finish(const char *name,
>   	int ret = 0;
>   
>   	if (flags & TEST_RESET) {
> -		ret = intel_engine_reset(ce->engine, "sseu");
> +		ret = intel_gt_engine_recover(ce->engine, "sseu");
>   		if (ret)
>   			goto out;
>   	}
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index 21829439e686..9485a622a704 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -2404,7 +2404,7 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
>   
>   	ring_set_paused(engine, 1); /* Freeze the current request in place */
>   	execlists_capture(engine);
> -	intel_engine_reset(engine, msg);
> +	intel_gt_engine_recover(engine, msg);
>   
>   	tasklet_enable(&engine->sched_engine->tasklet);
>   	clear_and_wake_up_bit(bit, lock);
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index b825daace58e..6504e8ba9c58 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -1348,7 +1348,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
>   }
>   
>   /**
> - * intel_engine_reset - reset GPU engine to recover from a hang
> + * intel_gt_engine_recover - reset GPU engine to recover from a hang
>    * @engine: engine to reset
>    * @msg: reason for GPU reset; or NULL for no drm_notice()
>    *
> @@ -1360,7 +1360,7 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
>    *  - reset engine (which will force the engine to idle)
>    *  - re-init/configure engine
>    */
> -int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
> +int intel_gt_engine_recover(struct intel_engine_cs *engine, const char *msg)
>   {
>   	int err;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
> index c00de353075c..be984357bf27 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.h
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.h
> @@ -31,8 +31,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
>   void intel_gt_reset(struct intel_gt *gt,
>   		    intel_engine_mask_t stalled_mask,
>   		    const char *reason);
> -int intel_engine_reset(struct intel_engine_cs *engine,
> -		       const char *reason);
> +int intel_gt_engine_recover(struct intel_engine_cs *engine,
> +			    const char *reason);
>   int __intel_engine_reset_bh(struct intel_engine_cs *engine,
>   			    const char *reason);
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 9ce8ff1c04fe..9bfda3f2bd24 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -495,9 +495,9 @@ static int igt_reset_nop_engine(void *arg)
>   
>   				i915_request_add(rq);
>   			}
> -			err = intel_engine_reset(engine, NULL);
> +			err = intel_gt_engine_recover(engine, NULL);
>   			if (err) {
> -				pr_err("intel_engine_reset(%s) failed, err:%d\n",
> +				pr_err("intel_gt_engine_recover(%s) failed, err:%d\n",
>   				       engine->name, err);
>   				break;
>   			}
> @@ -574,7 +574,7 @@ static int igt_reset_fail_engine(void *arg)
>   					    &gt->reset.flags));
>   
>   		force_reset_timeout(engine);
> -		err = intel_engine_reset(engine, NULL);
> +		err = intel_gt_engine_recover(engine, NULL);
>   		cancel_reset_timeout(engine);
>   		if (err == 0) /* timeouts only generated on gen8+ */
>   			goto skip;
> @@ -623,9 +623,9 @@ static int igt_reset_fail_engine(void *arg)
>   			}
>   
>   			if (count & 1) {
> -				err = intel_engine_reset(engine, NULL);
> +				err = intel_gt_engine_recover(engine, NULL);
>   				if (err) {
> -					GEM_TRACE_ERR("intel_engine_reset(%s) failed, err:%d\n",
> +					GEM_TRACE_ERR("intel_gt_engine_recover(%s) failed, err:%d\n",
>   						      engine->name, err);
>   					GEM_TRACE_DUMP();
>   					i915_request_put(last);
> @@ -633,10 +633,10 @@ static int igt_reset_fail_engine(void *arg)
>   				}
>   			} else {
>   				force_reset_timeout(engine);
> -				err = intel_engine_reset(engine, NULL);
> +				err = intel_gt_engine_recover(engine, NULL);
>   				cancel_reset_timeout(engine);
>   				if (err != -ETIMEDOUT) {
> -					pr_err("intel_engine_reset(%s) did not fail, err:%d\n",
> +					pr_err("intel_gt_engine_recover(%s) did not fail, err:%d\n",
>   					       engine->name, err);
>   					i915_request_put(last);
>   					break;
> @@ -765,9 +765,9 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
>   			}
>   
>   			if (!using_guc) {
> -				err = intel_engine_reset(engine, NULL);
> +				err = intel_gt_engine_recover(engine, NULL);
>   				if (err) {
> -					pr_err("intel_engine_reset(%s) failed, err:%d\n",
> +					pr_err("intel_gt_engine_recover(%s) failed, err:%d\n",
>   					       engine->name, err);
>   					goto skip;
>   				}
> @@ -1085,7 +1085,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
>   			}
>   
>   			if (!using_guc) {
> -				err = intel_engine_reset(engine, NULL);
> +				err = intel_gt_engine_recover(engine, NULL);
>   				if (err) {
>   					pr_err("i915_reset_engine(%s:%s): failed, err=%d\n",
>   					       engine->name, test_name, err);
> diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> index d73e438fb85f..b7b15dd3163f 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> @@ -336,7 +336,7 @@ static int active_engine_reset(struct intel_context *ce,
>   
>   	err = request_add_spin(rq, &spin);
>   	if (err == 0 && !using_guc)
> -		err = intel_engine_reset(ce->engine, reason);
> +		err = intel_gt_engine_recover(ce->engine, reason);
>   
>   	/* Ensure the reset happens and kills the engine */
>   	if (err == 0)
> @@ -356,7 +356,7 @@ static int __live_mocs_reset(struct live_mocs *mocs,
>   
>   	if (intel_has_reset_engine(gt)) {
>   		if (!using_guc) {
> -			err = intel_engine_reset(ce->engine, "mocs");
> +			err = intel_gt_engine_recover(ce->engine, "mocs");
>   			if (err)
>   				return err;
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
> index 2cfc23c58e90..9eaa1aed9f58 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_reset.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
> @@ -115,7 +115,7 @@ __igt_reset_stolen(struct intel_gt *gt,
>   	} else {
>   		for_each_engine(engine, gt, id) {
>   			if (mask & engine->mask)
> -				intel_engine_reset(engine, NULL);
> +				intel_gt_engine_recover(engine, NULL);
>   		}
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> index 14a8b25b6204..eb7516c7cb56 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> @@ -256,7 +256,7 @@ static int do_device_reset(struct intel_engine_cs *engine)
>   
>   static int do_engine_reset(struct intel_engine_cs *engine)
>   {
> -	return intel_engine_reset(engine, "live_workarounds");
> +	return intel_gt_engine_recover(engine, "live_workarounds");
>   }
>   
>   static int do_guc_reset(struct intel_engine_cs *engine)
> @@ -1282,7 +1282,7 @@ live_engine_reset_workarounds(void *arg)
>   				goto err;
>   			}
>   
> -			ret = intel_engine_reset(engine, "live_workarounds:idle");
> +			ret = intel_gt_engine_recover(engine, "live_workarounds:idle");
>   			if (ret) {
>   				pr_err("%s: Reset failed while idle\n", engine->name);
>   				goto err;
> @@ -1320,7 +1320,7 @@ live_engine_reset_workarounds(void *arg)
>   		}
>   
>   		if (!using_guc) {
> -			ret = intel_engine_reset(engine, "live_workarounds:active");
> +			ret = intel_gt_engine_recover(engine, "live_workarounds:active");
>   			if (ret) {
>   				pr_err("%s: Reset failed on an active spinner\n",
>   				       engine->name);


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/i915: Fix gt reset with GuC submission disabled
  2024-04-18 17:10 ` [PATCH 3/3] drm/i915: Fix gt reset with GuC submission disabled Nirmoy Das
@ 2024-04-18 23:38   ` John Harrison
  2024-04-19  9:46     ` Nirmoy Das
  0 siblings, 1 reply; 13+ messages in thread
From: John Harrison @ 2024-04-18 23:38 UTC (permalink / raw)
  To: Nirmoy Das, intel-gfx; +Cc: dri-devel

On 4/18/2024 10:10, Nirmoy Das wrote:
> Currently intel_gt_reset() happens as follows:
>
> reset_prepare() ---> Sends GDRST to GuC, GuC is in GS_MIA_IN_RESET
> do_reset()
>    intel_gt_reset_all_engines()
>      *_engine_reset_prepare() -->RESET_CTL expects running GuC
Not technically correct. There is no direct connection between RESET_CTL 
and GuC.

>      *_reset_engines()
> intel_gt_init_hw() --> GuC comes out of GS_MIA_IN_RESET with FW loaded.
>
> Fix the issue by sanitizing the GuC only after resetting requested
> engines and before intel_gt_init_hw().
You never actually state what the issue is.

The problem is that there is a dedicated CSB FIFO going to GuC (and 
nothing else has access to it). If that FIFO fills up, the hardware will 
block on the next context switch until there is space. If no-one (i.e. 
GuC) is draining it, that means the system is effectively hung. If an 
engine is reset whilst actively executing a context, a CSB entry will be 
sent to say that the context has gone idle. Thus if you reset a very 
busy system and start with killing GuC before killing the engines and 
only then re-enabling GuC, you run the risk of generating more CSB 
entries than will fit in the FIFO and deadlocking. Whereas, if the 
system is idle then you can reset the engines as much as you like while 
GuC is dead and it won't be a problem.

>
> Note intel_uc_reset_finish() and intel_uc_reset() are nop when
> guc submission is disabled.
>
> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_reset.c | 16 ++++++++++++++--
>   1 file changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 6504e8ba9c58..bd166f5aca4b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -907,8 +907,17 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
>   	intel_engine_mask_t awake = 0;
>   	enum intel_engine_id id;
>   
> -	/* For GuC mode, ensure submission is disabled before stopping ring */
> -	intel_uc_reset_prepare(&gt->uc);
> +	/**
> +	 * For GuC mode with submission enabled, ensure submission
> +	 * is disabled before stopping ring.
> +	 *
> +	 * For GuC mode with submission disabled, ensure that GuC is not
> +	 * sanitized, do that at the end in reset_finish(). reset_prepare()
> +	 * is followed by engine reset which in this mode requires GuC to
> +	 * be functional to process engine reset events.
-> to process any CSB FIFO entries generated by the resets.

John.

> +	 */
> +	if (intel_uc_uses_guc_submission(&gt->uc))
> +		intel_uc_reset_prepare(&gt->uc);
>   
>   	for_each_engine(engine, gt, id) {
>   		if (intel_engine_pm_get_if_awake(engine))
> @@ -1255,6 +1264,9 @@ void intel_gt_reset(struct intel_gt *gt,
>   
>   	intel_overlay_reset(gt->i915);
>   
> +	/* sanitize uC after engine reset */
> +	if (!intel_uc_uses_guc_submission(&gt->uc))
> +		intel_uc_reset_prepare(&gt->uc);
>   	/*
>   	 * Next we need to restore the context, but we don't use those
>   	 * yet either...


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset()
  2024-04-18 23:27 ` [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() John Harrison
@ 2024-04-19  8:44   ` Nirmoy Das
  0 siblings, 0 replies; 13+ messages in thread
From: Nirmoy Das @ 2024-04-19  8:44 UTC (permalink / raw)
  To: John Harrison, intel-gfx; +Cc: dri-devel

Hi John.

On 4/19/2024 1:27 AM, John Harrison wrote:
> On 4/18/2024 10:10, Nirmoy Das wrote:
>> __intel_gt_reset() is really for resetting engines though
>> the name might suggest something else. So add two helper functions
>> to remove confusions with no functional changes.
> Technically you only added one and just moved the other :). It already 
> existed, it just wasn't being used everywhere that it could be!

I did have one more helper functions but I removed it in favor of 
intel_gt_reset_engine() but didn't change the commit message :/

Thanks for catching it. I will fix it.

>
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  2 +-
>>   .../drm/i915/gt/intel_execlists_submission.c  |  2 +-
>>   drivers/gpu/drm/i915/gt/intel_gt.c            |  2 +-
>>   drivers/gpu/drm/i915/gt/intel_gt_pm.c         |  2 +-
>>   drivers/gpu/drm/i915/gt/intel_reset.c         | 43 ++++++++++++++-----
>>   drivers/gpu/drm/i915/gt/intel_reset.h         |  3 +-
>>   drivers/gpu/drm/i915/gt/selftest_reset.c      |  2 +-
>>   drivers/gpu/drm/i915/i915_driver.c            |  2 +-
>>   8 files changed, 41 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
>> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> index 8c44af1c3451..5c8e9ee3b008 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> @@ -678,7 +678,7 @@ void intel_engines_release(struct intel_gt *gt)
>>        */
>>       GEM_BUG_ON(intel_gt_pm_is_awake(gt));
>>       if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
>> -        __intel_gt_reset(gt, ALL_ENGINES);
>> +        intel_gt_reset_all_engines(gt);
>>         /* Decouple the backend; but keep the layout for late GPU 
>> resets */
>>       for_each_engine(engine, gt, id) {
>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
>> b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> index 355aab5b38ba..21829439e686 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> @@ -2898,7 +2898,7 @@ static void enable_error_interrupt(struct 
>> intel_engine_cs *engine)
>>           drm_err(&engine->i915->drm,
>>               "engine '%s' resumed still in error: %08x\n",
>>               engine->name, status);
>> -        __intel_gt_reset(engine->gt, engine->mask);
>> +        intel_gt_reset_engine(engine);
>>       }
>>         /*
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
>> b/drivers/gpu/drm/i915/gt/intel_gt.c
>> index 580b5141ce1e..626b166e67ef 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>> @@ -832,7 +832,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
>>         /* Scrub all HW state upon release */
>>       with_intel_runtime_pm(gt->uncore->rpm, wakeref)
>> -        __intel_gt_reset(gt, ALL_ENGINES);
>> +        intel_gt_reset_all_engines(gt);
>>   }
>>     void intel_gt_driver_release(struct intel_gt *gt)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c 
>> b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>> index 220ac4f92edf..c08fdb65cc69 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
>> @@ -159,7 +159,7 @@ static bool reset_engines(struct intel_gt *gt)
>>       if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
>>           return false;
>>   -    return __intel_gt_reset(gt, ALL_ENGINES) == 0;
>> +    return intel_gt_reset_all_engines(gt) == 0;
>>   }
>>     static void gt_sanitize(struct intel_gt *gt, bool force)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
>> b/drivers/gpu/drm/i915/gt/intel_reset.c
>> index c8e9aa41fdea..b825daace58e 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
>> @@ -764,7 +764,7 @@ wa_14015076503_end(struct intel_gt *gt, 
>> intel_engine_mask_t engine_mask)
>>                HECI_H_GS1_ER_PREP, 0);
>>   }
>>   -int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t 
>> engine_mask)
>> +static int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t 
>> engine_mask)
>>   {
>>       const int retries = engine_mask == ALL_ENGINES ? 
>> RESET_MAX_RETRIES : 1;
>>       reset_func reset;
>> @@ -795,6 +795,34 @@ int __intel_gt_reset(struct intel_gt *gt, 
>> intel_engine_mask_t engine_mask)
>>       return ret;
>>   }
>>   +/**
>> + * intel_gt_reset_all_engines() - Reset all engines in the given gt.
>> + * @gt: the GT to reset all engines for.
>> + *
>> + * This function resets all engines within the given gt.
>> + *
>> + * Returns:
>> + * Zero on success, negative error code on failure.
>> + */
>> +int intel_gt_reset_all_engines(struct intel_gt *gt)
>> +{
>> +    return __intel_gt_reset(gt, ALL_ENGINES);
>> +}
>> +
>> +/**
>> + * intel_gt_reset_engine() - Reset a specific engine within a gt.
>> + * @engine: engine to be reset.
>> + *
>> + * This function resets the specified engine within a gt.
>> + *
>> + * Returns:
>> + * Zero on success, negative error code on failure.
>> + */
>> +int intel_gt_reset_engine(struct intel_engine_cs *engine)
>> +{
>> +    return __intel_gt_reset(engine->gt, engine->mask);
>> +}
>> +
> You could have just dropped the 'static' from the existing copy of 
> this function and added the new version next to it. That would make 
> the diff simpler and therefore clearer. Unless you think there is a 
> good reason to move the code up here?

Yes, make sense, I will do that.


Thanks,

Nirmoy

>
> John.
>
>>   bool intel_has_gpu_reset(const struct intel_gt *gt)
>>   {
>>       if (!gt->i915->params.reset)
>> @@ -978,7 +1006,7 @@ static void __intel_gt_set_wedged(struct 
>> intel_gt *gt)
>>         /* Even if the GPU reset fails, it should still stop the 
>> engines */
>>       if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
>> -        __intel_gt_reset(gt, ALL_ENGINES);
>> +        intel_gt_reset_all_engines(gt);
>>         for_each_engine(engine, gt, id)
>>           engine->submit_request = nop_submit_request;
>> @@ -1089,7 +1117,7 @@ static bool __intel_gt_unset_wedged(struct 
>> intel_gt *gt)
>>       /* We must reset pending GPU events before restoring our 
>> submission */
>>       ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism 
>> desired */
>>       if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
>> -        ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
>> +        ok = intel_gt_reset_all_engines(gt) == 0;
>>       if (!ok) {
>>           /*
>>            * Warn CI about the unrecoverable wedged condition.
>> @@ -1133,10 +1161,10 @@ static int do_reset(struct intel_gt *gt, 
>> intel_engine_mask_t stalled_mask)
>>   {
>>       int err, i;
>>   -    err = __intel_gt_reset(gt, ALL_ENGINES);
>> +    err = intel_gt_reset_all_engines(gt);
>>       for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
>>           msleep(10 * (i + 1));
>> -        err = __intel_gt_reset(gt, ALL_ENGINES);
>> +        err = intel_gt_reset_all_engines(gt);
>>       }
>>       if (err)
>>           return err;
>> @@ -1270,11 +1298,6 @@ void intel_gt_reset(struct intel_gt *gt,
>>       goto finish;
>>   }
>>   -static int intel_gt_reset_engine(struct intel_engine_cs *engine)
>> -{
>> -    return __intel_gt_reset(engine->gt, engine->mask);
>> -}
>> -
>>   int __intel_engine_reset_bh(struct intel_engine_cs *engine, const 
>> char *msg)
>>   {
>>       struct intel_gt *gt = engine->gt;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h 
>> b/drivers/gpu/drm/i915/gt/intel_reset.h
>> index f615b30b81c5..c00de353075c 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_reset.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_reset.h
>> @@ -54,7 +54,8 @@ int intel_gt_terminally_wedged(struct intel_gt *gt);
>>   void intel_gt_set_wedged_on_init(struct intel_gt *gt);
>>   void intel_gt_set_wedged_on_fini(struct intel_gt *gt);
>>   -int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t 
>> engine_mask);
>> +int intel_gt_reset_engine(struct intel_engine_cs *engine);
>> +int intel_gt_reset_all_engines(struct intel_gt *gt);
>>     int intel_reset_guc(struct intel_gt *gt);
>>   diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c 
>> b/drivers/gpu/drm/i915/gt/selftest_reset.c
>> index f40de408cd3a..2cfc23c58e90 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_reset.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
>> @@ -281,7 +281,7 @@ static int igt_atomic_reset(void *arg)
>>           awake = reset_prepare(gt);
>>           p->critical_section_begin();
>>   -        err = __intel_gt_reset(gt, ALL_ENGINES);
>> +        err = intel_gt_reset_all_engines(gt);
>>             p->critical_section_end();
>>           reset_finish(gt, awake);
>> diff --git a/drivers/gpu/drm/i915/i915_driver.c 
>> b/drivers/gpu/drm/i915/i915_driver.c
>> index 4b9233c07a22..622a24305bc2 100644
>> --- a/drivers/gpu/drm/i915/i915_driver.c
>> +++ b/drivers/gpu/drm/i915/i915_driver.c
>> @@ -202,7 +202,7 @@ static void sanitize_gpu(struct drm_i915_private 
>> *i915)
>>           unsigned int i;
>>             for_each_gt(gt, i915, i)
>> -            __intel_gt_reset(gt, ALL_ENGINES);
>> +            intel_gt_reset_all_engines(gt);
>>       }
>>   }
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] drm/i915 Rename intel_engine_reset to intel_gt_engine_recover
  2024-04-18 23:27   ` John Harrison
@ 2024-04-19  8:48     ` Nirmoy Das
  0 siblings, 0 replies; 13+ messages in thread
From: Nirmoy Das @ 2024-04-19  8:48 UTC (permalink / raw)
  To: John Harrison, intel-gfx; +Cc: dri-devel

Hi John,

On 4/19/2024 1:27 AM, John Harrison wrote:
> On 4/18/2024 10:10, Nirmoy Das wrote:
>> intel_engine_reset() not only reset a engine but also
>> tries to recover it so give it a proper name without
>> any functional changes.
> Not seeing what the difference is. If this was a super low level 
> function (with an __ prefix for example) then one might expect it to 
> literally just poke the reset register and leave the engine in a dead 
> state. But as a high level function, I think it is reasonable to 
> expect a reset function to 'recover' the entity being reset.
>
> Also, many of the callers are tests that are explicitly testing reset. 
> So now the tests all talk about attempting resets, resets failing, 
> etc. but around a call to 'recover' instead of 'reset', which seems 
> confusing.


Didn't think about it, I will drop it.


Thanks,

Nirmoy

>
> John.
>
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>> ---
>>   .../drm/i915/gem/selftests/i915_gem_context.c |  2 +-
>>   .../drm/i915/gt/intel_execlists_submission.c  |  2 +-
>>   drivers/gpu/drm/i915/gt/intel_reset.c         |  4 ++--
>>   drivers/gpu/drm/i915/gt/intel_reset.h         |  4 ++--
>>   drivers/gpu/drm/i915/gt/selftest_hangcheck.c  | 20 +++++++++----------
>>   drivers/gpu/drm/i915/gt/selftest_mocs.c       |  4 ++--
>>   drivers/gpu/drm/i915/gt/selftest_reset.c      |  2 +-
>>   .../gpu/drm/i915/gt/selftest_workarounds.c    |  6 +++---
>>   8 files changed, 22 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c 
>> b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>> index 89d4dc8b60c6..4f4cde55f621 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>> @@ -1171,7 +1171,7 @@ __sseu_finish(const char *name,
>>       int ret = 0;
>>         if (flags & TEST_RESET) {
>> -        ret = intel_engine_reset(ce->engine, "sseu");
>> +        ret = intel_gt_engine_recover(ce->engine, "sseu");
>>           if (ret)
>>               goto out;
>>       }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
>> b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> index 21829439e686..9485a622a704 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> @@ -2404,7 +2404,7 @@ static void execlists_reset(struct 
>> intel_engine_cs *engine, const char *msg)
>>         ring_set_paused(engine, 1); /* Freeze the current request in 
>> place */
>>       execlists_capture(engine);
>> -    intel_engine_reset(engine, msg);
>> +    intel_gt_engine_recover(engine, msg);
>> tasklet_enable(&engine->sched_engine->tasklet);
>>       clear_and_wake_up_bit(bit, lock);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
>> b/drivers/gpu/drm/i915/gt/intel_reset.c
>> index b825daace58e..6504e8ba9c58 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
>> @@ -1348,7 +1348,7 @@ int __intel_engine_reset_bh(struct 
>> intel_engine_cs *engine, const char *msg)
>>   }
>>     /**
>> - * intel_engine_reset - reset GPU engine to recover from a hang
>> + * intel_gt_engine_recover - reset GPU engine to recover from a hang
>>    * @engine: engine to reset
>>    * @msg: reason for GPU reset; or NULL for no drm_notice()
>>    *
>> @@ -1360,7 +1360,7 @@ int __intel_engine_reset_bh(struct 
>> intel_engine_cs *engine, const char *msg)
>>    *  - reset engine (which will force the engine to idle)
>>    *  - re-init/configure engine
>>    */
>> -int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
>> +int intel_gt_engine_recover(struct intel_engine_cs *engine, const 
>> char *msg)
>>   {
>>       int err;
>>   diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h 
>> b/drivers/gpu/drm/i915/gt/intel_reset.h
>> index c00de353075c..be984357bf27 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_reset.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_reset.h
>> @@ -31,8 +31,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
>>   void intel_gt_reset(struct intel_gt *gt,
>>               intel_engine_mask_t stalled_mask,
>>               const char *reason);
>> -int intel_engine_reset(struct intel_engine_cs *engine,
>> -               const char *reason);
>> +int intel_gt_engine_recover(struct intel_engine_cs *engine,
>> +                const char *reason);
>>   int __intel_engine_reset_bh(struct intel_engine_cs *engine,
>>                   const char *reason);
>>   diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c 
>> b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
>> index 9ce8ff1c04fe..9bfda3f2bd24 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
>> @@ -495,9 +495,9 @@ static int igt_reset_nop_engine(void *arg)
>>                     i915_request_add(rq);
>>               }
>> -            err = intel_engine_reset(engine, NULL);
>> +            err = intel_gt_engine_recover(engine, NULL);
>>               if (err) {
>> -                pr_err("intel_engine_reset(%s) failed, err:%d\n",
>> +                pr_err("intel_gt_engine_recover(%s) failed, err:%d\n",
>>                          engine->name, err);
>>                   break;
>>               }
>> @@ -574,7 +574,7 @@ static int igt_reset_fail_engine(void *arg)
>>                           &gt->reset.flags));
>>             force_reset_timeout(engine);
>> -        err = intel_engine_reset(engine, NULL);
>> +        err = intel_gt_engine_recover(engine, NULL);
>>           cancel_reset_timeout(engine);
>>           if (err == 0) /* timeouts only generated on gen8+ */
>>               goto skip;
>> @@ -623,9 +623,9 @@ static int igt_reset_fail_engine(void *arg)
>>               }
>>                 if (count & 1) {
>> -                err = intel_engine_reset(engine, NULL);
>> +                err = intel_gt_engine_recover(engine, NULL);
>>                   if (err) {
>> -                    GEM_TRACE_ERR("intel_engine_reset(%s) failed, 
>> err:%d\n",
>> +                    GEM_TRACE_ERR("intel_gt_engine_recover(%s) 
>> failed, err:%d\n",
>>                                 engine->name, err);
>>                       GEM_TRACE_DUMP();
>>                       i915_request_put(last);
>> @@ -633,10 +633,10 @@ static int igt_reset_fail_engine(void *arg)
>>                   }
>>               } else {
>>                   force_reset_timeout(engine);
>> -                err = intel_engine_reset(engine, NULL);
>> +                err = intel_gt_engine_recover(engine, NULL);
>>                   cancel_reset_timeout(engine);
>>                   if (err != -ETIMEDOUT) {
>> -                    pr_err("intel_engine_reset(%s) did not fail, 
>> err:%d\n",
>> +                    pr_err("intel_gt_engine_recover(%s) did not 
>> fail, err:%d\n",
>>                              engine->name, err);
>>                       i915_request_put(last);
>>                       break;
>> @@ -765,9 +765,9 @@ static int __igt_reset_engine(struct intel_gt 
>> *gt, bool active)
>>               }
>>                 if (!using_guc) {
>> -                err = intel_engine_reset(engine, NULL);
>> +                err = intel_gt_engine_recover(engine, NULL);
>>                   if (err) {
>> -                    pr_err("intel_engine_reset(%s) failed, err:%d\n",
>> +                    pr_err("intel_gt_engine_recover(%s) failed, 
>> err:%d\n",
>>                              engine->name, err);
>>                       goto skip;
>>                   }
>> @@ -1085,7 +1085,7 @@ static int __igt_reset_engines(struct intel_gt 
>> *gt,
>>               }
>>                 if (!using_guc) {
>> -                err = intel_engine_reset(engine, NULL);
>> +                err = intel_gt_engine_recover(engine, NULL);
>>                   if (err) {
>>                       pr_err("i915_reset_engine(%s:%s): failed, 
>> err=%d\n",
>>                              engine->name, test_name, err);
>> diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c 
>> b/drivers/gpu/drm/i915/gt/selftest_mocs.c
>> index d73e438fb85f..b7b15dd3163f 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
>> @@ -336,7 +336,7 @@ static int active_engine_reset(struct 
>> intel_context *ce,
>>         err = request_add_spin(rq, &spin);
>>       if (err == 0 && !using_guc)
>> -        err = intel_engine_reset(ce->engine, reason);
>> +        err = intel_gt_engine_recover(ce->engine, reason);
>>         /* Ensure the reset happens and kills the engine */
>>       if (err == 0)
>> @@ -356,7 +356,7 @@ static int __live_mocs_reset(struct live_mocs *mocs,
>>         if (intel_has_reset_engine(gt)) {
>>           if (!using_guc) {
>> -            err = intel_engine_reset(ce->engine, "mocs");
>> +            err = intel_gt_engine_recover(ce->engine, "mocs");
>>               if (err)
>>                   return err;
>>   diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c 
>> b/drivers/gpu/drm/i915/gt/selftest_reset.c
>> index 2cfc23c58e90..9eaa1aed9f58 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_reset.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
>> @@ -115,7 +115,7 @@ __igt_reset_stolen(struct intel_gt *gt,
>>       } else {
>>           for_each_engine(engine, gt, id) {
>>               if (mask & engine->mask)
>> -                intel_engine_reset(engine, NULL);
>> +                intel_gt_engine_recover(engine, NULL);
>>           }
>>       }
>>   diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c 
>> b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>> index 14a8b25b6204..eb7516c7cb56 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>> @@ -256,7 +256,7 @@ static int do_device_reset(struct intel_engine_cs 
>> *engine)
>>     static int do_engine_reset(struct intel_engine_cs *engine)
>>   {
>> -    return intel_engine_reset(engine, "live_workarounds");
>> +    return intel_gt_engine_recover(engine, "live_workarounds");
>>   }
>>     static int do_guc_reset(struct intel_engine_cs *engine)
>> @@ -1282,7 +1282,7 @@ live_engine_reset_workarounds(void *arg)
>>                   goto err;
>>               }
>>   -            ret = intel_engine_reset(engine, 
>> "live_workarounds:idle");
>> +            ret = intel_gt_engine_recover(engine, 
>> "live_workarounds:idle");
>>               if (ret) {
>>                   pr_err("%s: Reset failed while idle\n", engine->name);
>>                   goto err;
>> @@ -1320,7 +1320,7 @@ live_engine_reset_workarounds(void *arg)
>>           }
>>             if (!using_guc) {
>> -            ret = intel_engine_reset(engine, 
>> "live_workarounds:active");
>> +            ret = intel_gt_engine_recover(engine, 
>> "live_workarounds:active");
>>               if (ret) {
>>                   pr_err("%s: Reset failed on an active spinner\n",
>>                          engine->name);
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/i915: Fix gt reset with GuC submission disabled
  2024-04-18 23:38   ` John Harrison
@ 2024-04-19  9:46     ` Nirmoy Das
  0 siblings, 0 replies; 13+ messages in thread
From: Nirmoy Das @ 2024-04-19  9:46 UTC (permalink / raw)
  To: John Harrison, intel-gfx; +Cc: dri-devel

Hi John,

On 4/19/2024 1:38 AM, John Harrison wrote:
> On 4/18/2024 10:10, Nirmoy Das wrote:
>> Currently intel_gt_reset() happens as follows:
>>
>> reset_prepare() ---> Sends GDRST to GuC, GuC is in GS_MIA_IN_RESET
>> do_reset()
>>    intel_gt_reset_all_engines()
>>      *_engine_reset_prepare() -->RESET_CTL expects running GuC
> Not technically correct. There is no direct connection between 
> RESET_CTL and GuC.
>
>>      *_reset_engines()
>> intel_gt_init_hw() --> GuC comes out of GS_MIA_IN_RESET with FW loaded.
>>
>> Fix the issue by sanitizing the GuC only after resetting requested
>> engines and before intel_gt_init_hw().
> You never actually state what the issue is.
>
> The problem is that there is a dedicated CSB FIFO going to GuC (and 
> nothing else has access to it). If that FIFO fills up, the hardware 
> will block on the next context switch until there is space. If no-one 
> (i.e. GuC) is draining it, that means the system is effectively hung. 
> If an engine is reset whilst actively executing a context, a CSB entry 
> will be sent to say that the context has gone idle. Thus if you reset 
> a very busy system and start with killing GuC before killing the 
> engines and only then re-enabling GuC, you run the risk of generating 
> more CSB entries than will fit in the FIFO and deadlocking. Whereas, 
> if the system is idle then you can reset the engines as much as you 
> like while GuC is dead and it won't be a problem.


I wasn't sure if I could talk about internal details so kept it to 
minimal. I will borrow above explanation and resend :)

>
>>
>> Note intel_uc_reset_finish() and intel_uc_reset() are nop when
>> guc submission is disabled.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_reset.c | 16 ++++++++++++++--
>>   1 file changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
>> b/drivers/gpu/drm/i915/gt/intel_reset.c
>> index 6504e8ba9c58..bd166f5aca4b 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
>> @@ -907,8 +907,17 @@ static intel_engine_mask_t reset_prepare(struct 
>> intel_gt *gt)
>>       intel_engine_mask_t awake = 0;
>>       enum intel_engine_id id;
>>   -    /* For GuC mode, ensure submission is disabled before stopping 
>> ring */
>> -    intel_uc_reset_prepare(&gt->uc);
>> +    /**
>> +     * For GuC mode with submission enabled, ensure submission
>> +     * is disabled before stopping ring.
>> +     *
>> +     * For GuC mode with submission disabled, ensure that GuC is not
>> +     * sanitized, do that at the end in reset_finish(). reset_prepare()
>> +     * is followed by engine reset which in this mode requires GuC to
>> +     * be functional to process engine reset events.
> -> to process any CSB FIFO entries generated by the resets.

I will add this.


Thanks,

Nirmoy

>
> John.
>
>> +     */
>> +    if (intel_uc_uses_guc_submission(&gt->uc))
>> +        intel_uc_reset_prepare(&gt->uc);
>>         for_each_engine(engine, gt, id) {
>>           if (intel_engine_pm_get_if_awake(engine))
>> @@ -1255,6 +1264,9 @@ void intel_gt_reset(struct intel_gt *gt,
>>         intel_overlay_reset(gt->i915);
>>   +    /* sanitize uC after engine reset */
>> +    if (!intel_uc_uses_guc_submission(&gt->uc))
>> +        intel_uc_reset_prepare(&gt->uc);
>>       /*
>>        * Next we need to restore the context, but we don't use those
>>        * yet either...
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2024-04-19  9:47 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-18 17:10 [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() Nirmoy Das
2024-04-18 17:10 ` [PATCH 2/3] drm/i915 Rename intel_engine_reset to intel_gt_engine_recover Nirmoy Das
2024-04-18 23:27   ` John Harrison
2024-04-19  8:48     ` Nirmoy Das
2024-04-18 17:10 ` [PATCH 3/3] drm/i915: Fix gt reset with GuC submission disabled Nirmoy Das
2024-04-18 23:38   ` John Harrison
2024-04-19  9:46     ` Nirmoy Das
2024-04-18 20:09 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset() Patchwork
2024-04-18 20:16 ` ✗ Fi.CI.BAT: failure " Patchwork
2024-04-18 21:46 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915: Refactor confusing __intel_gt_reset() (rev2) Patchwork
2024-04-18 21:56 ` ✗ Fi.CI.BAT: failure " Patchwork
2024-04-18 23:27 ` [PATCH 1/3] drm/i915: Refactor confusing __intel_gt_reset() John Harrison
2024-04-19  8:44   ` Nirmoy Das

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.