intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts
@ 2022-02-24 13:03 Tvrtko Ursulin
  2022-02-24 23:37 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Be more gentle when exiting non-persistent contexts (rev3) Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2022-02-24 13:03 UTC (permalink / raw)
  To: Intel-gfx; +Cc: dri-devel, Zhen Han, Chris Wilson

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When a non-persistent context exits we currently mark it as banned in
order to trigger fast termination of any outstanding GPU jobs it may have
left running.

In doing so we apply a very strict 1ms limit in which the left over job
has to preempt before we issues an engine resets.

Some workloads are not able to cleanly preempt in that time window and it
can be argued that it would instead be better to give them a bit more
grace since avoiding engine resets is generally preferrable.

To achieve this the patch splits handling of banned contexts from simply
closed non-persistent ones and then applies different timeouts for both
and also extends the criteria which determines if a request should be
scheduled back in after preemption or not.

20ms preempt timeout grace is given to exited non-persistent contexts
which have been empirically tested to satisfy customers requirements
and still provides reasonably quick cleanup post exit.

v2:
 * Streamline fast path checks.

v3:
 * Simplify by using only schedulable status.
 * Increase timeout to 20ms.

v4:
 * Fix live_execlists selftest.

v5:
 * Fix logic in kill_engines.

v6:
 * Rebase.

v7:
 * Add GuC support.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhen Han <zhen.han@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 22 +++++++++++-----
 drivers/gpu/drm/i915/gt/intel_context.c       | 25 ++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_context.h       | 26 ++++++++++++++-----
 drivers/gpu/drm/i915/gt/intel_context_types.h |  4 ++-
 .../drm/i915/gt/intel_execlists_submission.c  | 13 +++++++---
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  7 ++---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 15 ++++++-----
 drivers/gpu/drm/i915/i915_request.c           |  2 +-
 8 files changed, 86 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index bc6d59df064d..3a61ec753894 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1332,7 +1332,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
 	return engine;
 }
 
-static void kill_engines(struct i915_gem_engines *engines, bool ban)
+static void
+kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
 {
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -1346,8 +1347,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 	 */
 	for_each_gem_engine(ce, engines, it) {
 		struct intel_engine_cs *engine;
+		bool skip = false;
 
-		if (ban && intel_context_ban(ce, NULL))
+		if (ban)
+			skip = intel_context_ban(ce, NULL);
+		else if (!persistent)
+			skip = intel_context_exit_nonpersistent(ce, NULL);
+
+		/* Already banned or non-persistent closed. */
+		if (skip)
 			continue;
 
 		/*
@@ -1360,7 +1368,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 		engine = active_engine(ce);
 
 		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
+		if (engine && !__cancel_engine(engine) && (ban || !persistent))
 			/*
 			 * If we are unable to send a preemptive pulse to bump
 			 * the context from the GPU, we have to resort to a full
@@ -1372,8 +1380,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 static void kill_context(struct i915_gem_context *ctx)
 {
-	bool ban = (!i915_gem_context_is_persistent(ctx) ||
-		    !ctx->i915->params.enable_hangcheck);
 	struct i915_gem_engines *pos, *next;
 
 	spin_lock_irq(&ctx->stale.lock);
@@ -1386,7 +1392,8 @@ static void kill_context(struct i915_gem_context *ctx)
 
 		spin_unlock_irq(&ctx->stale.lock);
 
-		kill_engines(pos, ban);
+		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+			     i915_gem_context_is_persistent(ctx));
 
 		spin_lock_irq(&ctx->stale.lock);
 		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -1432,7 +1439,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
 
 kill:
 	if (list_empty(&engines->link)) /* raced, already closed */
-		kill_engines(engines, true);
+		kill_engines(engines, true,
+			     i915_gem_context_is_persistent(ctx));
 
 	i915_sw_fence_commit(&engines->fence);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 5d0ec7c49b6a..27cd71c13097 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -576,6 +576,31 @@ void intel_context_bind_parent_child(struct intel_context *parent,
 	child->parallel.parent = parent;
 }
 
+bool intel_context_ban(struct intel_context *ce, struct i915_request *rq)
+{
+	bool ret = intel_context_set_banned(ce);
+
+	trace_intel_context_ban(ce);
+
+	if (ce->ops->revoke)
+		ce->ops->revoke(ce, rq,
+				INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS);
+
+	return ret;
+}
+
+bool intel_context_exit_nonpersistent(struct intel_context *ce,
+				      struct i915_request *rq)
+{
+	bool ret = intel_context_set_exiting(ce);
+
+	if (ce->ops->revoke)
+		ce->ops->revoke(ce, rq,
+				INTEL_CONTEXT_EXITING_PREEMPT_TIMEOUT_MS);
+
+	return ret;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_context.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index d8c74bbf9aae..11c95e9d76ab 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -25,6 +25,9 @@
 		     ##__VA_ARGS__);					\
 } while (0)
 
+#define INTEL_CONTEXT_EXITING_PREEMPT_TIMEOUT_MS (20)
+#define INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS  (1)
+
 struct i915_gem_ww_ctx;
 
 void intel_context_init(struct intel_context *ce,
@@ -309,18 +312,27 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
 	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
 }
 
-static inline bool intel_context_ban(struct intel_context *ce,
-				     struct i915_request *rq)
+bool intel_context_ban(struct intel_context *ce, struct i915_request *rq);
+
+static inline bool intel_context_is_schedulable(const struct intel_context *ce)
 {
-	bool ret = intel_context_set_banned(ce);
+	return !test_bit(CONTEXT_EXITING, &ce->flags) &&
+	       !test_bit(CONTEXT_BANNED, &ce->flags);
+}
 
-	trace_intel_context_ban(ce);
-	if (ce->ops->ban)
-		ce->ops->ban(ce, rq);
+static inline bool intel_context_is_exiting(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_EXITING, &ce->flags);
+}
 
-	return ret;
+static inline bool intel_context_set_exiting(struct intel_context *ce)
+{
+	return test_and_set_bit(CONTEXT_EXITING, &ce->flags);
 }
 
+bool intel_context_exit_nonpersistent(struct intel_context *ce,
+				      struct i915_request *rq);
+
 static inline bool
 intel_context_force_single_submission(const struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 30cd81ad8911..34073430cf8a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -37,7 +37,8 @@ struct intel_context_ops {
 
 	int (*alloc)(struct intel_context *ce);
 
-	void (*ban)(struct intel_context *ce, struct i915_request *rq);
+	void (*revoke)(struct intel_context *ce, struct i915_request *rq,
+		       unsigned int preempt_timeout_ms);
 
 	int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
 	int (*pin)(struct intel_context *ce, void *vaddr);
@@ -119,6 +120,7 @@ struct intel_context {
 #define CONTEXT_GUC_INIT		10
 #define CONTEXT_PERMA_PIN		11
 #define CONTEXT_IS_PARKING		12
+#define CONTEXT_EXITING			13
 
 	struct {
 		u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 961d795220a3..953d1e7453c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -481,7 +481,7 @@ __execlists_schedule_in(struct i915_request *rq)
 		     !intel_engine_has_heartbeat(engine)))
 		intel_context_set_banned(ce);
 
-	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
+	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
 		reset_active(rq, engine);
 
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -1225,12 +1225,19 @@ static void record_preemption(struct intel_engine_execlists *execlists)
 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
 					    const struct i915_request *rq)
 {
+	struct intel_context *ce;
+
 	if (!rq)
 		return 0;
 
+	ce = rq->context;
+
 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
-	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
-		return 1;
+	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
+		return INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS;
+	/* Longer grace for closed non-persistent contexts to avoid resets. */
+	else if (unlikely(intel_context_is_exiting(ce)))
+		return INTEL_CONTEXT_EXITING_PREEMPT_TIMEOUT_MS;
 
 	return READ_ONCE(engine->props.preempt_timeout_ms);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 6d7ec3bf1f32..a3dcc227a702 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -596,8 +596,9 @@ static void ring_context_reset(struct intel_context *ce)
 	clear_bit(CONTEXT_VALID_BIT, &ce->flags);
 }
 
-static void ring_context_ban(struct intel_context *ce,
-			     struct i915_request *rq)
+static void ring_context_revoke(struct intel_context *ce,
+				struct i915_request *rq,
+				unsigned int preempt_timeout_ms)
 {
 	struct intel_engine_cs *engine;
 
@@ -632,7 +633,7 @@ static const struct intel_context_ops ring_context_ops = {
 
 	.cancel_request = ring_context_cancel_request,
 
-	.ban = ring_context_ban,
+	.revoke = ring_context_revoke,
 
 	.pre_pin = ring_context_pre_pin,
 	.pin = ring_context_pin,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index b3a429a92c0d..23fdbc7b34ab 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2602,7 +2602,9 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
 	intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
 }
 
-static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
+static void
+guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
+		   unsigned int preempt_timeout_ms)
 {
 	struct intel_guc *guc = ce_to_guc(ce);
 	struct intel_runtime_pm *runtime_pm =
@@ -2641,7 +2643,8 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
 		 * gets kicked off the HW ASAP.
 		 */
 		with_intel_runtime_pm(runtime_pm, wakeref) {
-			__guc_context_set_preemption_timeout(guc, guc_id, 1);
+			__guc_context_set_preemption_timeout(guc, guc_id,
+							     preempt_timeout_ms);
 			__guc_context_sched_disable(guc, ce, guc_id);
 		}
 	} else {
@@ -2649,7 +2652,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
 			with_intel_runtime_pm(runtime_pm, wakeref)
 				__guc_context_set_preemption_timeout(guc,
 								     ce->guc_id.id,
-								     1);
+								     preempt_timeout_ms);
 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 	}
 }
@@ -2998,7 +3001,7 @@ static const struct intel_context_ops guc_context_ops = {
 	.unpin = guc_context_unpin,
 	.post_unpin = guc_context_post_unpin,
 
-	.ban = guc_context_ban,
+	.revoke = guc_context_revoke,
 
 	.cancel_request = guc_context_cancel_request,
 
@@ -3247,7 +3250,7 @@ static const struct intel_context_ops virtual_guc_context_ops = {
 	.unpin = guc_virtual_context_unpin,
 	.post_unpin = guc_context_post_unpin,
 
-	.ban = guc_context_ban,
+	.revoke = guc_context_revoke,
 
 	.cancel_request = guc_context_cancel_request,
 
@@ -3336,7 +3339,7 @@ static const struct intel_context_ops virtual_parent_context_ops = {
 	.unpin = guc_parent_context_unpin,
 	.post_unpin = guc_context_post_unpin,
 
-	.ban = guc_context_ban,
+	.revoke = guc_context_revoke,
 
 	.cancel_request = guc_context_cancel_request,
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 582770360ad1..13c555d190a0 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -611,7 +611,7 @@ bool __i915_request_submit(struct i915_request *request)
 		goto active;
 	}
 
-	if (unlikely(intel_context_is_banned(request->context)))
+	if (unlikely(!intel_context_is_schedulable(request->context)))
 		i915_request_set_error_once(request, -EIO);
 
 	if (unlikely(fatal_error(request->fence.error)))
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Be more gentle when exiting non-persistent contexts (rev3)
  2022-02-24 13:03 [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts Tvrtko Ursulin
@ 2022-02-24 23:37 ` Patchwork
  2022-02-25  0:05 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  2022-02-25 15:29 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  2 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2022-02-24 23:37 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Be more gentle when exiting non-persistent contexts (rev3)
URL   : https://patchwork.freedesktop.org/series/93420/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Be more gentle when exiting non-persistent contexts (rev3)
  2022-02-24 13:03 [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts Tvrtko Ursulin
  2022-02-24 23:37 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Be more gentle when exiting non-persistent contexts (rev3) Patchwork
@ 2022-02-25  0:05 ` Patchwork
  2022-02-25 15:29 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  2 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2022-02-25  0:05 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 5804 bytes --]

== Series Details ==

Series: drm/i915: Be more gentle when exiting non-persistent contexts (rev3)
URL   : https://patchwork.freedesktop.org/series/93420/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_11283 -> Patchwork_22399
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/index.html

Participating hosts (42 -> 39)
------------------------------

  Additional (2): fi-cml-u2 fi-pnv-d510 
  Missing    (5): fi-kbl-soraka fi-bxt-dsi bat-dg1-5 fi-bsw-cyan fi-bdw-samus 

Known issues
------------

  Here are the changes found in Patchwork_22399 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_basic@memory-alloc:
    - fi-cml-u2:          NOTRUN -> [SKIP][1] ([fdo#109315]) +17 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-cml-u2/igt@amdgpu/amd_basic@memory-alloc.html

  * igt@gem_exec_fence@basic-busy@bcs0:
    - fi-cml-u2:          NOTRUN -> [SKIP][2] ([i915#1208]) +1 similar issue
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-cml-u2/igt@gem_exec_fence@basic-busy@bcs0.html

  * igt@gem_huc_copy@huc-copy:
    - fi-cml-u2:          NOTRUN -> [SKIP][3] ([i915#2190])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-cml-u2/igt@gem_huc_copy@huc-copy.html

  * igt@gem_lmem_swapping@parallel-random-engines:
    - fi-cml-u2:          NOTRUN -> [SKIP][4] ([i915#4613]) +3 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-cml-u2/igt@gem_lmem_swapping@parallel-random-engines.html

  * igt@kms_chamelium@dp-hpd-fast:
    - fi-cml-u2:          NOTRUN -> [SKIP][5] ([fdo#109284] / [fdo#111827]) +8 similar issues
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-cml-u2/igt@kms_chamelium@dp-hpd-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - fi-cml-u2:          NOTRUN -> [SKIP][6] ([fdo#109278]) +1 similar issue
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-cml-u2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  * igt@kms_force_connector_basic@force-load-detect:
    - fi-cml-u2:          NOTRUN -> [SKIP][7] ([fdo#109285])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-cml-u2/igt@kms_force_connector_basic@force-load-detect.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - fi-cml-u2:          NOTRUN -> [SKIP][8] ([fdo#109278] / [i915#533])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-cml-u2/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@kms_psr@cursor_plane_move:
    - fi-skl-6600u:       NOTRUN -> [INCOMPLETE][9] ([i915#636])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-skl-6600u/igt@kms_psr@cursor_plane_move.html

  * igt@prime_vgem@basic-userptr:
    - fi-pnv-d510:        NOTRUN -> [SKIP][10] ([fdo#109271]) +57 similar issues
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-pnv-d510/igt@prime_vgem@basic-userptr.html
    - fi-cml-u2:          NOTRUN -> [SKIP][11] ([i915#3301])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-cml-u2/igt@prime_vgem@basic-userptr.html

  
#### Possible fixes ####

  * igt@kms_busy@basic@modeset:
    - {bat-adlp-6}:       [DMESG-WARN][12] ([i915#3576]) -> [PASS][13]
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/bat-adlp-6/igt@kms_busy@basic@modeset.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/bat-adlp-6/igt@kms_busy@basic@modeset.html

  * igt@kms_psr@primary_page_flip:
    - fi-skl-6600u:       [INCOMPLETE][14] ([i915#4838]) -> [PASS][15]
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/fi-skl-6600u/igt@kms_psr@primary_page_flip.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/fi-skl-6600u/igt@kms_psr@primary_page_flip.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1208]: https://gitlab.freedesktop.org/drm/intel/issues/1208
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#3301]: https://gitlab.freedesktop.org/drm/intel/issues/3301
  [i915#3576]: https://gitlab.freedesktop.org/drm/intel/issues/3576
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4838]: https://gitlab.freedesktop.org/drm/intel/issues/4838
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#636]: https://gitlab.freedesktop.org/drm/intel/issues/636


Build changes
-------------

  * Linux: CI_DRM_11283 -> Patchwork_22399

  CI-20190529: 20190529
  CI_DRM_11283: 5d17a0d32cc09a2f2b20d2aeb1cadf7f2d6e13d2 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6357: 6546304ecf053b9c5ec278ee3c210d2c6d50a3a6 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_22399: 6dff236e3618eb3664dc779a2ee8c57685da471e @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

6dff236e3618 drm/i915: Be more gentle when exiting non-persistent contexts

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/index.html

[-- Attachment #2: Type: text/html, Size: 6736 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915: Be more gentle when exiting non-persistent contexts (rev3)
  2022-02-24 13:03 [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts Tvrtko Ursulin
  2022-02-24 23:37 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Be more gentle when exiting non-persistent contexts (rev3) Patchwork
  2022-02-25  0:05 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2022-02-25 15:29 ` Patchwork
  2 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2022-02-25 15:29 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 30290 bytes --]

== Series Details ==

Series: drm/i915: Be more gentle when exiting non-persistent contexts (rev3)
URL   : https://patchwork.freedesktop.org/series/93420/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11283_full -> Patchwork_22399_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_22399_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_22399_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Participating hosts (11 -> 11)
------------------------------

  No changes in participating hosts

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_22399_full:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_pm_rpm@modeset-non-lpsp-stress:
    - shard-kbl:          NOTRUN -> [TIMEOUT][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl7/igt@i915_pm_rpm@modeset-non-lpsp-stress.html
    - shard-apl:          NOTRUN -> [TIMEOUT][2]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl6/igt@i915_pm_rpm@modeset-non-lpsp-stress.html

  
Known issues
------------

  Here are the changes found in Patchwork_22399_full that come from known issues:

### CI changes ###

#### Possible fixes ####

  * boot:
    - shard-apl:          ([PASS][3], [PASS][4], [PASS][5], [PASS][6], [PASS][7], [PASS][8], [PASS][9], [PASS][10], [PASS][11], [FAIL][12], [PASS][13], [PASS][14], [PASS][15], [PASS][16], [PASS][17], [PASS][18], [PASS][19], [PASS][20], [PASS][21], [PASS][22], [PASS][23], [PASS][24], [PASS][25], [PASS][26], [PASS][27]) ([i915#4386]) -> ([PASS][28], [PASS][29], [PASS][30], [PASS][31], [PASS][32], [PASS][33], [PASS][34], [PASS][35], [PASS][36], [PASS][37], [PASS][38], [PASS][39], [PASS][40], [PASS][41], [PASS][42], [PASS][43], [PASS][44], [PASS][45], [PASS][46], [PASS][47], [PASS][48], [PASS][49], [PASS][50], [PASS][51], [PASS][52])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl3/boot.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl4/boot.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl3/boot.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl3/boot.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl3/boot.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl3/boot.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl2/boot.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl1/boot.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl1/boot.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl1/boot.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl1/boot.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl2/boot.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl2/boot.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl8/boot.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl8/boot.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl8/boot.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl8/boot.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl7/boot.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl7/boot.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl7/boot.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl7/boot.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl6/boot.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl6/boot.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl4/boot.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl4/boot.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl6/boot.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl1/boot.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl1/boot.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl1/boot.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl2/boot.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl2/boot.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl2/boot.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl3/boot.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl3/boot.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/boot.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl3/boot.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl4/boot.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/boot.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/boot.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl4/boot.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/boot.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/boot.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl7/boot.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl7/boot.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl4/boot.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl7/boot.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl7/boot.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl6/boot.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl6/boot.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl6/boot.html

  

### IGT changes ###

#### Issues hit ####

  * igt@feature_discovery@display-4x:
    - shard-iclb:         NOTRUN -> [SKIP][53] ([i915#1839])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@feature_discovery@display-4x.html

  * igt@gem_eio@in-flight-contexts-immediate:
    - shard-iclb:         [PASS][54] -> [TIMEOUT][55] ([i915#3070])
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-iclb2/igt@gem_eio@in-flight-contexts-immediate.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb5/igt@gem_eio@in-flight-contexts-immediate.html

  * igt@gem_eio@kms:
    - shard-tglb:         [PASS][56] -> [FAIL][57] ([i915#232])
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-tglb6/igt@gem_eio@kms.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb5/igt@gem_eio@kms.html

  * igt@gem_exec_balancer@parallel-balancer:
    - shard-iclb:         [PASS][58] -> [SKIP][59] ([i915#4525])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-iclb4/igt@gem_exec_balancer@parallel-balancer.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@gem_exec_balancer@parallel-balancer.html

  * igt@gem_exec_fair@basic-deadline:
    - shard-kbl:          [PASS][60] -> [FAIL][61] ([i915#2846])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-kbl4/igt@gem_exec_fair@basic-deadline.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl6/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-none-share@rcs0:
    - shard-iclb:         [PASS][62] -> [FAIL][63] ([i915#2842])
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-iclb2/igt@gem_exec_fair@basic-none-share@rcs0.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb5/igt@gem_exec_fair@basic-none-share@rcs0.html

  * igt@gem_exec_fair@basic-none-solo@rcs0:
    - shard-apl:          [PASS][64] -> [FAIL][65] ([i915#2842])
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl3/igt@gem_exec_fair@basic-none-solo@rcs0.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl6/igt@gem_exec_fair@basic-none-solo@rcs0.html

  * igt@gem_exec_fair@basic-none@vcs0:
    - shard-kbl:          NOTRUN -> [FAIL][66] ([i915#2842]) +1 similar issue
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl7/igt@gem_exec_fair@basic-none@vcs0.html

  * igt@gem_exec_fair@basic-none@vcs1:
    - shard-iclb:         NOTRUN -> [FAIL][67] ([i915#2842])
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb4/igt@gem_exec_fair@basic-none@vcs1.html

  * igt@gem_exec_suspend@basic-s3@smem:
    - shard-apl:          [PASS][68] -> [DMESG-WARN][69] ([i915#180]) +2 similar issues
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-apl8/igt@gem_exec_suspend@basic-s3@smem.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl2/igt@gem_exec_suspend@basic-s3@smem.html

  * igt@gem_huc_copy@huc-copy:
    - shard-apl:          NOTRUN -> [SKIP][70] ([fdo#109271] / [i915#2190])
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl4/igt@gem_huc_copy@huc-copy.html

  * igt@gem_lmem_swapping@parallel-multi:
    - shard-skl:          NOTRUN -> [SKIP][71] ([fdo#109271] / [i915#4613])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-skl6/igt@gem_lmem_swapping@parallel-multi.html

  * igt@gem_lmem_swapping@parallel-random:
    - shard-kbl:          NOTRUN -> [SKIP][72] ([fdo#109271] / [i915#4613]) +1 similar issue
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl3/igt@gem_lmem_swapping@parallel-random.html

  * igt@gem_lmem_swapping@parallel-random-verify:
    - shard-iclb:         NOTRUN -> [SKIP][73] ([i915#4613]) +1 similar issue
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb3/igt@gem_lmem_swapping@parallel-random-verify.html

  * igt@gem_lmem_swapping@random:
    - shard-apl:          NOTRUN -> [SKIP][74] ([fdo#109271] / [i915#4613]) +1 similar issue
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl6/igt@gem_lmem_swapping@random.html

  * igt@gem_pread@exhaustion:
    - shard-kbl:          NOTRUN -> [WARN][75] ([i915#2658])
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl3/igt@gem_pread@exhaustion.html

  * igt@gem_pxp@create-protected-buffer:
    - shard-iclb:         NOTRUN -> [SKIP][76] ([i915#4270]) +3 similar issues
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb3/igt@gem_pxp@create-protected-buffer.html

  * igt@gem_render_copy@y-tiled-mc-ccs-to-y-tiled-ccs:
    - shard-iclb:         NOTRUN -> [SKIP][77] ([i915#768]) +2 similar issues
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@gem_render_copy@y-tiled-mc-ccs-to-y-tiled-ccs.html

  * igt@gem_userptr_blits@dmabuf-sync:
    - shard-iclb:         NOTRUN -> [SKIP][78] ([i915#3323])
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@gem_userptr_blits@dmabuf-sync.html

  * igt@gem_userptr_blits@dmabuf-unsync:
    - shard-iclb:         NOTRUN -> [SKIP][79] ([i915#3297])
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb5/igt@gem_userptr_blits@dmabuf-unsync.html

  * igt@gem_userptr_blits@input-checking:
    - shard-apl:          NOTRUN -> [DMESG-WARN][80] ([i915#4991])
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/igt@gem_userptr_blits@input-checking.html

  * igt@gen9_exec_parse@unaligned-jump:
    - shard-iclb:         NOTRUN -> [SKIP][81] ([i915#2856]) +1 similar issue
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@gen9_exec_parse@unaligned-jump.html

  * igt@i915_pm_dc@dc3co-vpb-simulation:
    - shard-apl:          NOTRUN -> [SKIP][82] ([fdo#109271] / [i915#658]) +1 similar issue
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/igt@i915_pm_dc@dc3co-vpb-simulation.html

  * igt@i915_pm_rc6_residency@rc6-fence:
    - shard-iclb:         NOTRUN -> [WARN][83] ([i915#1804] / [i915#2684])
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@i915_pm_rc6_residency@rc6-fence.html

  * igt@i915_pm_rpm@modeset-non-lpsp-stress-no-wait:
    - shard-iclb:         NOTRUN -> [SKIP][84] ([fdo#110892])
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb3/igt@i915_pm_rpm@modeset-non-lpsp-stress-no-wait.html

  * igt@i915_pm_rpm@modeset-pc8-residency-stress:
    - shard-iclb:         NOTRUN -> [SKIP][85] ([fdo#109293] / [fdo#109506])
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@i915_pm_rpm@modeset-pc8-residency-stress.html

  * igt@kms_big_fb@x-tiled-8bpp-rotate-270:
    - shard-iclb:         NOTRUN -> [SKIP][86] ([fdo#110725] / [fdo#111614]) +2 similar issues
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@kms_big_fb@x-tiled-8bpp-rotate-270.html

  * igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-180-hflip:
    - shard-apl:          NOTRUN -> [SKIP][87] ([fdo#109271] / [i915#3777]) +3 similar issues
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl4/igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-180-hflip.html

  * igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-0-hflip-async-flip:
    - shard-kbl:          NOTRUN -> [SKIP][88] ([fdo#109271] / [i915#3777]) +1 similar issue
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl3/igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-0-hflip-async-flip.html

  * igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-180-hflip-async-flip:
    - shard-skl:          NOTRUN -> [SKIP][89] ([fdo#109271] / [i915#3777]) +1 similar issue
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-skl6/igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-180-hflip-async-flip.html

  * igt@kms_big_fb@yf-tiled-8bpp-rotate-0:
    - shard-tglb:         NOTRUN -> [SKIP][90] ([fdo#111615])
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb1/igt@kms_big_fb@yf-tiled-8bpp-rotate-0.html

  * igt@kms_big_fb@yf-tiled-max-hw-stride-64bpp-rotate-180-async-flip:
    - shard-iclb:         NOTRUN -> [SKIP][91] ([fdo#110723]) +1 similar issue
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@kms_big_fb@yf-tiled-max-hw-stride-64bpp-rotate-180-async-flip.html

  * igt@kms_ccs@pipe-a-bad-rotation-90-y_tiled_gen12_mc_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][92] ([i915#3689] / [i915#3886])
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb7/igt@kms_ccs@pipe-a-bad-rotation-90-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_gen12_rc_ccs_cc:
    - shard-kbl:          NOTRUN -> [SKIP][93] ([fdo#109271] / [i915#3886]) +3 similar issues
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl3/igt@kms_ccs@pipe-a-crc-sprite-planes-basic-y_tiled_gen12_rc_ccs_cc.html

  * igt@kms_ccs@pipe-a-random-ccs-data-y_tiled_gen12_rc_ccs_cc:
    - shard-iclb:         NOTRUN -> [SKIP][94] ([fdo#109278] / [i915#3886]) +5 similar issues
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@kms_ccs@pipe-a-random-ccs-data-y_tiled_gen12_rc_ccs_cc.html

  * igt@kms_ccs@pipe-b-ccs-on-another-bo-y_tiled_gen12_mc_ccs:
    - shard-apl:          NOTRUN -> [SKIP][95] ([fdo#109271] / [i915#3886]) +8 similar issues
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/igt@kms_ccs@pipe-b-ccs-on-another-bo-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-b-random-ccs-data-y_tiled_gen12_mc_ccs:
    - shard-skl:          NOTRUN -> [SKIP][96] ([fdo#109271] / [i915#3886]) +1 similar issue
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-skl7/igt@kms_ccs@pipe-b-random-ccs-data-y_tiled_gen12_mc_ccs.html

  * igt@kms_ccs@pipe-c-bad-rotation-90-y_tiled_ccs:
    - shard-tglb:         NOTRUN -> [SKIP][97] ([i915#3689])
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb1/igt@kms_ccs@pipe-c-bad-rotation-90-y_tiled_ccs.html

  * igt@kms_ccs@pipe-d-missing-ccs-buffer-y_tiled_gen12_rc_ccs_cc:
    - shard-kbl:          NOTRUN -> [SKIP][98] ([fdo#109271]) +94 similar issues
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl6/igt@kms_ccs@pipe-d-missing-ccs-buffer-y_tiled_gen12_rc_ccs_cc.html

  * igt@kms_cdclk@mode-transition:
    - shard-iclb:         NOTRUN -> [SKIP][99] ([i915#3742])
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@kms_cdclk@mode-transition.html

  * igt@kms_chamelium@dp-crc-multiple:
    - shard-skl:          NOTRUN -> [SKIP][100] ([fdo#109271] / [fdo#111827]) +4 similar issues
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-skl6/igt@kms_chamelium@dp-crc-multiple.html

  * igt@kms_chamelium@dp-frame-dump:
    - shard-tglb:         NOTRUN -> [SKIP][101] ([fdo#109284] / [fdo#111827])
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb7/igt@kms_chamelium@dp-frame-dump.html

  * igt@kms_chamelium@hdmi-hpd-enable-disable-mode:
    - shard-iclb:         NOTRUN -> [SKIP][102] ([fdo#109284] / [fdo#111827]) +6 similar issues
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@kms_chamelium@hdmi-hpd-enable-disable-mode.html

  * igt@kms_chamelium@hdmi-hpd-storm:
    - shard-kbl:          NOTRUN -> [SKIP][103] ([fdo#109271] / [fdo#111827]) +5 similar issues
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl7/igt@kms_chamelium@hdmi-hpd-storm.html

  * igt@kms_color_chamelium@pipe-b-ctm-0-5:
    - shard-apl:          NOTRUN -> [SKIP][104] ([fdo#109271] / [fdo#111827]) +12 similar issues
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/igt@kms_color_chamelium@pipe-b-ctm-0-5.html

  * igt@kms_color_chamelium@pipe-d-ctm-0-25:
    - shard-iclb:         NOTRUN -> [SKIP][105] ([fdo#109278] / [fdo#109284] / [fdo#111827]) +2 similar issues
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@kms_color_chamelium@pipe-d-ctm-0-25.html

  * igt@kms_content_protection@lic:
    - shard-apl:          NOTRUN -> [TIMEOUT][106] ([i915#1319]) +1 similar issue
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl6/igt@kms_content_protection@lic.html
    - shard-kbl:          NOTRUN -> [TIMEOUT][107] ([i915#1319])
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl7/igt@kms_content_protection@lic.html

  * igt@kms_content_protection@uevent:
    - shard-iclb:         NOTRUN -> [SKIP][108] ([fdo#109300] / [fdo#111066])
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@kms_content_protection@uevent.html

  * igt@kms_cursor_crc@pipe-a-cursor-512x512-sliding:
    - shard-iclb:         NOTRUN -> [SKIP][109] ([fdo#109278] / [fdo#109279]) +3 similar issues
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@kms_cursor_crc@pipe-a-cursor-512x512-sliding.html

  * igt@kms_cursor_crc@pipe-b-cursor-32x32-offscreen:
    - shard-tglb:         NOTRUN -> [SKIP][110] ([i915#3319])
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb7/igt@kms_cursor_crc@pipe-b-cursor-32x32-offscreen.html

  * igt@kms_cursor_crc@pipe-b-cursor-512x170-rapid-movement:
    - shard-tglb:         NOTRUN -> [SKIP][111] ([i915#3359])
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb6/igt@kms_cursor_crc@pipe-b-cursor-512x170-rapid-movement.html

  * igt@kms_cursor_crc@pipe-c-cursor-512x512-rapid-movement:
    - shard-tglb:         NOTRUN -> [SKIP][112] ([fdo#109279] / [i915#3359]) +1 similar issue
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb1/igt@kms_cursor_crc@pipe-c-cursor-512x512-rapid-movement.html

  * igt@kms_cursor_crc@pipe-d-cursor-32x32-rapid-movement:
    - shard-iclb:         NOTRUN -> [SKIP][113] ([fdo#109278]) +27 similar issues
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@kms_cursor_crc@pipe-d-cursor-32x32-rapid-movement.html

  * igt@kms_cursor_legacy@cursorb-vs-flipb-atomic-transitions-varying-size:
    - shard-iclb:         NOTRUN -> [SKIP][114] ([fdo#109274] / [fdo#109278]) +4 similar issues
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@kms_cursor_legacy@cursorb-vs-flipb-atomic-transitions-varying-size.html

  * igt@kms_dp_tiled_display@basic-test-pattern-with-chamelium:
    - shard-iclb:         NOTRUN -> [SKIP][115] ([i915#3528])
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@kms_dp_tiled_display@basic-test-pattern-with-chamelium.html

  * igt@kms_flip@2x-flip-vs-rmfb-interruptible:
    - shard-iclb:         NOTRUN -> [SKIP][116] ([fdo#109274]) +3 similar issues
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb3/igt@kms_flip@2x-flip-vs-rmfb-interruptible.html

  * igt@kms_flip@2x-single-buffer-flip-vs-dpms-off-vs-modeset-interruptible:
    - shard-tglb:         NOTRUN -> [SKIP][117] ([fdo#109274] / [fdo#111825])
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb7/igt@kms_flip@2x-single-buffer-flip-vs-dpms-off-vs-modeset-interruptible.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@b-dp1:
    - shard-kbl:          NOTRUN -> [FAIL][118] ([i915#79])
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl6/igt@kms_flip@flip-vs-expired-vblank-interruptible@b-dp1.html

  * igt@kms_flip@flip-vs-expired-vblank@a-edp1:
    - shard-skl:          [PASS][119] -> [FAIL][120] ([i915#2122])
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-skl10/igt@kms_flip@flip-vs-expired-vblank@a-edp1.html
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-skl9/igt@kms_flip@flip-vs-expired-vblank@a-edp1.html

  * igt@kms_flip@flip-vs-expired-vblank@c-hdmi-a2:
    - shard-glk:          [PASS][121] -> [FAIL][122] ([i915#2122])
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-glk3/igt@kms_flip@flip-vs-expired-vblank@c-hdmi-a2.html
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-glk5/igt@kms_flip@flip-vs-expired-vblank@c-hdmi-a2.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-downscaling:
    - shard-iclb:         [PASS][123] -> [SKIP][124] ([i915#3701])
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-iclb7/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-downscaling.html
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb2/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-64bpp-ytile-downscaling.html

  * igt@kms_frontbuffer_tracking@fbc-2p-pri-indfb-multidraw:
    - shard-iclb:         NOTRUN -> [SKIP][125] ([fdo#109280]) +23 similar issues
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb5/igt@kms_frontbuffer_tracking@fbc-2p-pri-indfb-multidraw.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-cur-indfb-draw-pwrite:
    - shard-tglb:         NOTRUN -> [SKIP][126] ([fdo#109280] / [fdo#111825]) +2 similar issues
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb7/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-cur-indfb-draw-pwrite.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-mmap-wc:
    - shard-apl:          NOTRUN -> [SKIP][127] ([fdo#109271]) +147 similar issues
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-spr-indfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@psr-2p-scndscrn-spr-indfb-draw-mmap-cpu:
    - shard-skl:          NOTRUN -> [SKIP][128] ([fdo#109271]) +43 similar issues
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-skl6/igt@kms_frontbuffer_tracking@psr-2p-scndscrn-spr-indfb-draw-mmap-cpu.html

  * igt@kms_hdr@static-toggle:
    - shard-iclb:         NOTRUN -> [SKIP][129] ([i915#1187])
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@kms_hdr@static-toggle.html

  * igt@kms_pipe_b_c_ivb@disable-pipe-b-enable-pipe-c:
    - shard-iclb:         NOTRUN -> [SKIP][130] ([fdo#109289]) +2 similar issues
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@kms_pipe_b_c_ivb@disable-pipe-b-enable-pipe-c.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-7efc:
    - shard-kbl:          NOTRUN -> [FAIL][131] ([fdo#108145] / [i915#265])
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl3/igt@kms_plane_alpha_blend@pipe-b-alpha-7efc.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-basic:
    - shard-apl:          NOTRUN -> [FAIL][132] ([fdo#108145] / [i915#265]) +1 similar issue
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/igt@kms_plane_alpha_blend@pipe-b-alpha-basic.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          NOTRUN -> [FAIL][133] ([fdo#108145] / [i915#265])
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-skl7/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_plane_lowres@pipe-d-tiling-y:
    - shard-tglb:         NOTRUN -> [SKIP][134] ([i915#3536])
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-tglb7/igt@kms_plane_lowres@pipe-d-tiling-y.html

  * igt@kms_plane_scaling@scaler-with-clipping-clamping@pipe-c-scaler-with-clipping-clamping:
    - shard-apl:          NOTRUN -> [SKIP][135] ([fdo#109271] / [i915#2733])
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl6/igt@kms_plane_scaling@scaler-with-clipping-clamping@pipe-c-scaler-with-clipping-clamping.html
    - shard-kbl:          NOTRUN -> [SKIP][136] ([fdo#109271] / [i915#2733])
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl7/igt@kms_plane_scaling@scaler-with-clipping-clamping@pipe-c-scaler-with-clipping-clamping.html

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area:
    - shard-kbl:          NOTRUN -> [SKIP][137] ([fdo#109271] / [i915#658])
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl3/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area.html

  * igt@kms_psr2_su@page_flip-nv12:
    - shard-skl:          NOTRUN -> [SKIP][138] ([fdo#109271] / [i915#658])
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-skl6/igt@kms_psr2_su@page_flip-nv12.html

  * igt@kms_psr2_su@page_flip-xrgb8888:
    - shard-iclb:         NOTRUN -> [SKIP][139] ([fdo#109642] / [fdo#111068] / [i915#658])
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@kms_psr2_su@page_flip-xrgb8888.html

  * igt@kms_psr@psr2_primary_mmap_cpu:
    - shard-iclb:         NOTRUN -> [SKIP][140] ([fdo#109441]) +1 similar issue
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@kms_psr@psr2_primary_mmap_cpu.html

  * igt@kms_psr@psr2_suspend:
    - shard-iclb:         [PASS][141] -> [SKIP][142] ([fdo#109441]) +1 similar issue
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-iclb2/igt@kms_psr@psr2_suspend.html
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb5/igt@kms_psr@psr2_suspend.html

  * igt@kms_scaling_modes@scaling-mode-none@edp-1-pipe-c:
    - shard-iclb:         NOTRUN -> [SKIP][143] ([i915#5030]) +2 similar issues
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb3/igt@kms_scaling_modes@scaling-mode-none@edp-1-pipe-c.html

  * igt@kms_vblank@pipe-d-wait-idle:
    - shard-kbl:          NOTRUN -> [SKIP][144] ([fdo#109271] / [i915#533]) +1 similar issue
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl3/igt@kms_vblank@pipe-d-wait-idle.html

  * igt@kms_vrr@flip-suspend:
    - shard-iclb:         NOTRUN -> [SKIP][145] ([fdo#109502]) +1 similar issue
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb3/igt@kms_vrr@flip-suspend.html

  * igt@kms_writeback@writeback-fb-id:
    - shard-apl:          NOTRUN -> [SKIP][146] ([fdo#109271] / [i915#2437])
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-apl8/igt@kms_writeback@writeback-fb-id.html

  * igt@nouveau_crc@pipe-a-source-rg:
    - shard-iclb:         NOTRUN -> [SKIP][147] ([i915#2530]) +1 similar issue
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb6/igt@nouveau_crc@pipe-a-source-rg.html

  * igt@perf@polling:
    - shard-skl:          NOTRUN -> [FAIL][148] ([i915#1542])
   [148]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-skl6/igt@perf@polling.html

  * igt@perf_pmu@rc6-suspend:
    - shard-kbl:          [PASS][149] -> [DMESG-WARN][150] ([i915#180]) +2 similar issues
   [149]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11283/shard-kbl7/igt@perf_pmu@rc6-suspend.html
   [150]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-kbl1/igt@perf_pmu@rc6-suspend.html

  * igt@prime_nv_api@i915_nv_import_twice_check_flink_name:
    - shard-iclb:         NOTRUN -> [SKIP][151] ([fdo#109291]) +2 similar issues
   [151]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/shard-iclb7/igt@prime_nv_api@i915_nv_import_twice_check_flink_name.html

  * igt@prime_nv_pcopy@test1_micro:
    - shard-tglb:         NOTRUN ->

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_22399/index.html

[-- Attachment #2: Type: text/html, Size: 33898 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts
  2021-08-26 10:52 [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts Tvrtko Ursulin
@ 2021-08-26 13:32 ` Daniel Vetter
  0 siblings, 0 replies; 9+ messages in thread
From: Daniel Vetter @ 2021-08-26 13:32 UTC (permalink / raw)
  To: Tvrtko Ursulin
  Cc: Intel-gfx, dri-devel, Tvrtko Ursulin, Chris Wilson, Zhen Han,
	Matthew Brost

On Thu, Aug 26, 2021 at 11:52:14AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> When a non-persistent context exits we currently mark it as banned in
> order to trigger fast termination of any outstanding GPU jobs it may have
> left running.
> 
> In doing so we apply a very strict 1ms limit in which the left over job
> has to preempt before we issues an engine resets.
> 
> Some workloads are not able to cleanly preempt in that time window and it
> can be argued that it would instead be better to give them a bit more
> grace since avoiding engine resets is generally preferrable.
> 
> To achieve this the patch splits handling of banned contexts from simply
> closed non-persistent ones and then applies different timeouts for both
> and also extends the criteria which determines if a request should be
> scheduled back in after preemption or not.
> 
> 20ms preempt timeout grace is given to exited non-persistent contexts
> which have been empirically tested to satisfy customers requirements
> and still provides reasonably quick cleanup post exit.
> 
> v2:
>  * Streamline fast path checks.
> 
> v3:
>  * Simplify by using only schedulable status.
>  * Increase timeout to 20ms.
> 
> v4:
>  * Fix live_execlists selftest.
> 
> v5:
>  * Fix logic in kill_engines.
> 
> v6:
>  * Rebase.
> 
> v7:
>  * Add GuC support.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Zhen Han <zhen.han@intel.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 22 +++++++++++-----
>  drivers/gpu/drm/i915/gt/intel_context.c       | 25 ++++++++++++++++++
>  drivers/gpu/drm/i915/gt/intel_context.h       | 26 ++++++++++++++-----
>  drivers/gpu/drm/i915/gt/intel_context_types.h |  3 ++-
>  .../drm/i915/gt/intel_execlists_submission.c  | 13 +++++++---
>  .../gpu/drm/i915/gt/intel_ring_submission.c   |  7 ++---
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 13 ++++++----
>  drivers/gpu/drm/i915/i915_request.c           |  2 +-
>  8 files changed, 84 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index fd169cf2f75a..6ae803cb4de3 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -1072,7 +1072,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
>  	return engine;
>  }
>  
> -static void kill_engines(struct i915_gem_engines *engines, bool ban)
> +static void
> +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
>  {
>  	struct i915_gem_engines_iter it;
>  	struct intel_context *ce;
> @@ -1086,8 +1087,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  	 */
>  	for_each_gem_engine(ce, engines, it) {
>  		struct intel_engine_cs *engine;
> +		bool skip = false;
>  
> -		if (ban && intel_context_ban(ce, NULL))
> +		if (ban)
> +			skip = intel_context_ban(ce, NULL);
> +		else if (!persistent)
> +			skip = intel_context_exit_nonpersistent(ce, NULL);
> +
> +		/* Already banned or non-persistent closed. */
> +		if (skip)
>  			continue;
>  
>  		/*
> @@ -1100,7 +1108,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  		engine = active_engine(ce);
>  
>  		/* First attempt to gracefully cancel the context */
> -		if (engine && !__cancel_engine(engine) && ban)
> +		if (engine && !__cancel_engine(engine) && (ban || !persistent))
>  			/*
>  			 * If we are unable to send a preemptive pulse to bump
>  			 * the context from the GPU, we have to resort to a full
> @@ -1112,8 +1120,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  
>  static void kill_context(struct i915_gem_context *ctx)
>  {
> -	bool ban = (!i915_gem_context_is_persistent(ctx) ||
> -		    !ctx->i915->params.enable_hangcheck);
>  	struct i915_gem_engines *pos, *next;
>  
>  	spin_lock_irq(&ctx->stale.lock);
> @@ -1126,7 +1132,8 @@ static void kill_context(struct i915_gem_context *ctx)
>  
>  		spin_unlock_irq(&ctx->stale.lock);
>  
> -		kill_engines(pos, ban);
> +		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
> +			     i915_gem_context_is_persistent(ctx));
>  
>  		spin_lock_irq(&ctx->stale.lock);
>  		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
> @@ -1172,7 +1179,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
>  
>  kill:
>  	if (list_empty(&engines->link)) /* raced, already closed */
> -		kill_engines(engines, true);
> +		kill_engines(engines, true,
> +			     i915_gem_context_is_persistent(ctx));
>  
>  	i915_sw_fence_commit(&engines->fence);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 745e84c72c90..b9880ffe5da7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -533,6 +533,31 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce)
>  	return active;
>  }
>  
> +bool intel_context_ban(struct intel_context *ce, struct i915_request *rq)
> +{
> +	bool ret = intel_context_set_banned(ce);
> +
> +	trace_intel_context_ban(ce);
> +
> +	if (ce->ops->revoke)
> +		ce->ops->revoke(ce, rq,
> +				INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS);
> +
> +	return ret;
> +}
> +
> +bool intel_context_exit_nonpersistent(struct intel_context *ce,
> +				      struct i915_request *rq)
> +{
> +	bool ret = intel_context_set_exiting(ce);
> +
> +	if (ce->ops->revoke)
> +		ce->ops->revoke(ce, rq,
> +				INTEL_CONTEXT_EXITING_PREEMPT_TIMEOUT_MS);
> +
> +	return ret;
> +}
> +
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>  #include "selftest_context.c"
>  #endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index c41098950746..8b77f3e7449a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -25,6 +25,9 @@
>  		     ##__VA_ARGS__);					\
>  } while (0)
>  
> +#define INTEL_CONTEXT_EXITING_PREEMPT_TIMEOUT_MS (20)
> +#define INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS  (1)
> +
>  struct i915_gem_ww_ctx;
>  
>  void intel_context_init(struct intel_context *ce,
> @@ -254,18 +257,27 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
>  	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
>  }
>  
> -static inline bool intel_context_ban(struct intel_context *ce,
> -				     struct i915_request *rq)
> +bool intel_context_ban(struct intel_context *ce, struct i915_request *rq);
> +
> +static inline bool intel_context_is_schedulable(const struct intel_context *ce)
>  {
> -	bool ret = intel_context_set_banned(ce);
> +	return !test_bit(CONTEXT_EXITING, &ce->flags) &&
> +	       !test_bit(CONTEXT_BANNED, &ce->flags);
> +}
>  
> -	trace_intel_context_ban(ce);
> -	if (ce->ops->ban)
> -		ce->ops->ban(ce, rq);
> +static inline bool intel_context_is_exiting(const struct intel_context *ce)
> +{
> +	return test_bit(CONTEXT_EXITING, &ce->flags);
> +}
>  
> -	return ret;
> +static inline bool intel_context_set_exiting(struct intel_context *ce)
> +{
> +	return test_and_set_bit(CONTEXT_EXITING, &ce->flags);

Uh more unordered atomics without barriers and without comments explaining
the barriers and pointing at the counter-party barrier. This needs to be
fixed.

In general it would be really good if all the context status bits are at
least properly documented. As-is this is an extremely challenging maze,
and I'm not sure it's really correct in all cases.
-Daniel

>  }
>  
> +bool intel_context_exit_nonpersistent(struct intel_context *ce,
> +				      struct i915_request *rq);
> +
>  static inline bool
>  intel_context_force_single_submission(const struct intel_context *ce)
>  {
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index e54351a170e2..4a0a1e736972 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -37,7 +37,7 @@ struct intel_context_ops {
>  
>  	int (*alloc)(struct intel_context *ce);
>  
> -	void (*ban)(struct intel_context *ce, struct i915_request *rq);
> +	void (*revoke)(struct intel_context *ce, struct i915_request *rq, unsigned int preempt_timeout_ms);
>  
>  	int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
>  	int (*pin)(struct intel_context *ce, void *vaddr);
> @@ -112,6 +112,7 @@ struct intel_context {
>  #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
>  #define CONTEXT_NOPREEMPT		8
>  #define CONTEXT_LRCA_DIRTY		9
> +#define CONTEXT_EXITING			10
>  
>  	struct {
>  		u64 timeout_us;
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index de5f9c86b9a4..43823b9d4013 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
>  		     !intel_engine_has_heartbeat(engine)))
>  		intel_context_set_banned(ce);
>  
> -	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
> +	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
>  		reset_active(rq, engine);
>  
>  	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> @@ -1222,12 +1222,19 @@ static void record_preemption(struct intel_engine_execlists *execlists)
>  static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
>  					    const struct i915_request *rq)
>  {
> +	struct intel_context *ce;
> +
>  	if (!rq)
>  		return 0;
>  
> +	ce = rq->context;
> +
>  	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
> -	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
> -		return 1;
> +	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
> +		return INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS;
> +	/* Longer grace for closed non-persistent contexts to avoid resets. */
> +	else if (unlikely(intel_context_is_exiting(ce)))
> +		return INTEL_CONTEXT_EXITING_PREEMPT_TIMEOUT_MS;
>  
>  	return READ_ONCE(engine->props.preempt_timeout_ms);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> index 3c65efcb7bed..a28862bbe51e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> @@ -587,8 +587,9 @@ static void ring_context_reset(struct intel_context *ce)
>  	clear_bit(CONTEXT_VALID_BIT, &ce->flags);
>  }
>  
> -static void ring_context_ban(struct intel_context *ce,
> -			     struct i915_request *rq)
> +static void ring_context_revoke(struct intel_context *ce,
> +				struct i915_request *rq,
> +				unsigned int preempt_timeout_ms)
>  {
>  	struct intel_engine_cs *engine;
>  
> @@ -623,7 +624,7 @@ static const struct intel_context_ops ring_context_ops = {
>  
>  	.cancel_request = ring_context_cancel_request,
>  
> -	.ban = ring_context_ban,
> +	.revoke = ring_context_revoke,
>  
>  	.pre_pin = ring_context_pre_pin,
>  	.pin = ring_context_pin,
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 87d8dc8f51b9..b0a8629b26e3 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1618,7 +1618,9 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
>  	intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
>  }
>  
> -static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
> +static void
> +guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
> +		   unsigned int preempt_timeout_ms)
>  {
>  	struct intel_guc *guc = ce_to_guc(ce);
>  	struct intel_runtime_pm *runtime_pm =
> @@ -1655,7 +1657,8 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
>  		 * gets kicked off the HW ASAP.
>  		 */
>  		with_intel_runtime_pm(runtime_pm, wakeref) {
> -			__guc_context_set_preemption_timeout(guc, guc_id, 1);
> +			__guc_context_set_preemption_timeout(guc, guc_id,
> +							     preempt_timeout_ms);
>  			__guc_context_sched_disable(guc, ce, guc_id);
>  		}
>  	} else {
> @@ -1663,7 +1666,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
>  			with_intel_runtime_pm(runtime_pm, wakeref)
>  				__guc_context_set_preemption_timeout(guc,
>  								     ce->guc_id,
> -								     1);
> +								     preempt_timeout_ms);
>  		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
>  	}
>  }
> @@ -1979,7 +1982,7 @@ static const struct intel_context_ops guc_context_ops = {
>  	.unpin = guc_context_unpin,
>  	.post_unpin = guc_context_post_unpin,
>  
> -	.ban = guc_context_ban,
> +	.revoke = guc_context_revoke,
>  
>  	.cancel_request = guc_context_cancel_request,
>  
> @@ -2176,7 +2179,7 @@ static const struct intel_context_ops virtual_guc_context_ops = {
>  	.unpin = guc_context_unpin,
>  	.post_unpin = guc_context_post_unpin,
>  
> -	.ban = guc_context_ban,
> +	.revoke = guc_context_revoke,
>  
>  	.cancel_request = guc_context_cancel_request,
>  
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index ce446716d092..b1a9bec83339 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -583,7 +583,7 @@ bool __i915_request_submit(struct i915_request *request)
>  		goto active;
>  	}
>  
> -	if (unlikely(intel_context_is_banned(request->context)))
> +	if (unlikely(!intel_context_is_schedulable(request->context)))
>  		i915_request_set_error_once(request, -EIO);
>  
>  	if (unlikely(fatal_error(request->fence.error)))
> -- 
> 2.30.2
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts
@ 2021-08-26 10:52 Tvrtko Ursulin
  2021-08-26 13:32 ` Daniel Vetter
  0 siblings, 1 reply; 9+ messages in thread
From: Tvrtko Ursulin @ 2021-08-26 10:52 UTC (permalink / raw)
  To: Intel-gfx
  Cc: dri-devel, Tvrtko Ursulin, Chris Wilson, Zhen Han, Matthew Brost

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When a non-persistent context exits we currently mark it as banned in
order to trigger fast termination of any outstanding GPU jobs it may have
left running.

In doing so we apply a very strict 1ms limit in which the left over job
has to preempt before we issues an engine resets.

Some workloads are not able to cleanly preempt in that time window and it
can be argued that it would instead be better to give them a bit more
grace since avoiding engine resets is generally preferrable.

To achieve this the patch splits handling of banned contexts from simply
closed non-persistent ones and then applies different timeouts for both
and also extends the criteria which determines if a request should be
scheduled back in after preemption or not.

20ms preempt timeout grace is given to exited non-persistent contexts
which have been empirically tested to satisfy customers requirements
and still provides reasonably quick cleanup post exit.

v2:
 * Streamline fast path checks.

v3:
 * Simplify by using only schedulable status.
 * Increase timeout to 20ms.

v4:
 * Fix live_execlists selftest.

v5:
 * Fix logic in kill_engines.

v6:
 * Rebase.

v7:
 * Add GuC support.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhen Han <zhen.han@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 22 +++++++++++-----
 drivers/gpu/drm/i915/gt/intel_context.c       | 25 ++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_context.h       | 26 ++++++++++++++-----
 drivers/gpu/drm/i915/gt/intel_context_types.h |  3 ++-
 .../drm/i915/gt/intel_execlists_submission.c  | 13 +++++++---
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  7 ++---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 13 ++++++----
 drivers/gpu/drm/i915/i915_request.c           |  2 +-
 8 files changed, 84 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index fd169cf2f75a..6ae803cb4de3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1072,7 +1072,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
 	return engine;
 }
 
-static void kill_engines(struct i915_gem_engines *engines, bool ban)
+static void
+kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
 {
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -1086,8 +1087,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 	 */
 	for_each_gem_engine(ce, engines, it) {
 		struct intel_engine_cs *engine;
+		bool skip = false;
 
-		if (ban && intel_context_ban(ce, NULL))
+		if (ban)
+			skip = intel_context_ban(ce, NULL);
+		else if (!persistent)
+			skip = intel_context_exit_nonpersistent(ce, NULL);
+
+		/* Already banned or non-persistent closed. */
+		if (skip)
 			continue;
 
 		/*
@@ -1100,7 +1108,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 		engine = active_engine(ce);
 
 		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
+		if (engine && !__cancel_engine(engine) && (ban || !persistent))
 			/*
 			 * If we are unable to send a preemptive pulse to bump
 			 * the context from the GPU, we have to resort to a full
@@ -1112,8 +1120,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 static void kill_context(struct i915_gem_context *ctx)
 {
-	bool ban = (!i915_gem_context_is_persistent(ctx) ||
-		    !ctx->i915->params.enable_hangcheck);
 	struct i915_gem_engines *pos, *next;
 
 	spin_lock_irq(&ctx->stale.lock);
@@ -1126,7 +1132,8 @@ static void kill_context(struct i915_gem_context *ctx)
 
 		spin_unlock_irq(&ctx->stale.lock);
 
-		kill_engines(pos, ban);
+		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+			     i915_gem_context_is_persistent(ctx));
 
 		spin_lock_irq(&ctx->stale.lock);
 		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -1172,7 +1179,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
 
 kill:
 	if (list_empty(&engines->link)) /* raced, already closed */
-		kill_engines(engines, true);
+		kill_engines(engines, true,
+			     i915_gem_context_is_persistent(ctx));
 
 	i915_sw_fence_commit(&engines->fence);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 745e84c72c90..b9880ffe5da7 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -533,6 +533,31 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce)
 	return active;
 }
 
+bool intel_context_ban(struct intel_context *ce, struct i915_request *rq)
+{
+	bool ret = intel_context_set_banned(ce);
+
+	trace_intel_context_ban(ce);
+
+	if (ce->ops->revoke)
+		ce->ops->revoke(ce, rq,
+				INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS);
+
+	return ret;
+}
+
+bool intel_context_exit_nonpersistent(struct intel_context *ce,
+				      struct i915_request *rq)
+{
+	bool ret = intel_context_set_exiting(ce);
+
+	if (ce->ops->revoke)
+		ce->ops->revoke(ce, rq,
+				INTEL_CONTEXT_EXITING_PREEMPT_TIMEOUT_MS);
+
+	return ret;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_context.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index c41098950746..8b77f3e7449a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -25,6 +25,9 @@
 		     ##__VA_ARGS__);					\
 } while (0)
 
+#define INTEL_CONTEXT_EXITING_PREEMPT_TIMEOUT_MS (20)
+#define INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS  (1)
+
 struct i915_gem_ww_ctx;
 
 void intel_context_init(struct intel_context *ce,
@@ -254,18 +257,27 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
 	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
 }
 
-static inline bool intel_context_ban(struct intel_context *ce,
-				     struct i915_request *rq)
+bool intel_context_ban(struct intel_context *ce, struct i915_request *rq);
+
+static inline bool intel_context_is_schedulable(const struct intel_context *ce)
 {
-	bool ret = intel_context_set_banned(ce);
+	return !test_bit(CONTEXT_EXITING, &ce->flags) &&
+	       !test_bit(CONTEXT_BANNED, &ce->flags);
+}
 
-	trace_intel_context_ban(ce);
-	if (ce->ops->ban)
-		ce->ops->ban(ce, rq);
+static inline bool intel_context_is_exiting(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_EXITING, &ce->flags);
+}
 
-	return ret;
+static inline bool intel_context_set_exiting(struct intel_context *ce)
+{
+	return test_and_set_bit(CONTEXT_EXITING, &ce->flags);
 }
 
+bool intel_context_exit_nonpersistent(struct intel_context *ce,
+				      struct i915_request *rq);
+
 static inline bool
 intel_context_force_single_submission(const struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e54351a170e2..4a0a1e736972 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -37,7 +37,7 @@ struct intel_context_ops {
 
 	int (*alloc)(struct intel_context *ce);
 
-	void (*ban)(struct intel_context *ce, struct i915_request *rq);
+	void (*revoke)(struct intel_context *ce, struct i915_request *rq, unsigned int preempt_timeout_ms);
 
 	int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
 	int (*pin)(struct intel_context *ce, void *vaddr);
@@ -112,6 +112,7 @@ struct intel_context {
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
 #define CONTEXT_NOPREEMPT		8
 #define CONTEXT_LRCA_DIRTY		9
+#define CONTEXT_EXITING			10
 
 	struct {
 		u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de5f9c86b9a4..43823b9d4013 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
 		     !intel_engine_has_heartbeat(engine)))
 		intel_context_set_banned(ce);
 
-	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
+	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
 		reset_active(rq, engine);
 
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -1222,12 +1222,19 @@ static void record_preemption(struct intel_engine_execlists *execlists)
 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
 					    const struct i915_request *rq)
 {
+	struct intel_context *ce;
+
 	if (!rq)
 		return 0;
 
+	ce = rq->context;
+
 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
-	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
-		return 1;
+	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
+		return INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS;
+	/* Longer grace for closed non-persistent contexts to avoid resets. */
+	else if (unlikely(intel_context_is_exiting(ce)))
+		return INTEL_CONTEXT_EXITING_PREEMPT_TIMEOUT_MS;
 
 	return READ_ONCE(engine->props.preempt_timeout_ms);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 3c65efcb7bed..a28862bbe51e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -587,8 +587,9 @@ static void ring_context_reset(struct intel_context *ce)
 	clear_bit(CONTEXT_VALID_BIT, &ce->flags);
 }
 
-static void ring_context_ban(struct intel_context *ce,
-			     struct i915_request *rq)
+static void ring_context_revoke(struct intel_context *ce,
+				struct i915_request *rq,
+				unsigned int preempt_timeout_ms)
 {
 	struct intel_engine_cs *engine;
 
@@ -623,7 +624,7 @@ static const struct intel_context_ops ring_context_ops = {
 
 	.cancel_request = ring_context_cancel_request,
 
-	.ban = ring_context_ban,
+	.revoke = ring_context_revoke,
 
 	.pre_pin = ring_context_pre_pin,
 	.pin = ring_context_pin,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 87d8dc8f51b9..b0a8629b26e3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1618,7 +1618,9 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
 	intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
 }
 
-static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
+static void
+guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
+		   unsigned int preempt_timeout_ms)
 {
 	struct intel_guc *guc = ce_to_guc(ce);
 	struct intel_runtime_pm *runtime_pm =
@@ -1655,7 +1657,8 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
 		 * gets kicked off the HW ASAP.
 		 */
 		with_intel_runtime_pm(runtime_pm, wakeref) {
-			__guc_context_set_preemption_timeout(guc, guc_id, 1);
+			__guc_context_set_preemption_timeout(guc, guc_id,
+							     preempt_timeout_ms);
 			__guc_context_sched_disable(guc, ce, guc_id);
 		}
 	} else {
@@ -1663,7 +1666,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
 			with_intel_runtime_pm(runtime_pm, wakeref)
 				__guc_context_set_preemption_timeout(guc,
 								     ce->guc_id,
-								     1);
+								     preempt_timeout_ms);
 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 	}
 }
@@ -1979,7 +1982,7 @@ static const struct intel_context_ops guc_context_ops = {
 	.unpin = guc_context_unpin,
 	.post_unpin = guc_context_post_unpin,
 
-	.ban = guc_context_ban,
+	.revoke = guc_context_revoke,
 
 	.cancel_request = guc_context_cancel_request,
 
@@ -2176,7 +2179,7 @@ static const struct intel_context_ops virtual_guc_context_ops = {
 	.unpin = guc_context_unpin,
 	.post_unpin = guc_context_post_unpin,
 
-	.ban = guc_context_ban,
+	.revoke = guc_context_revoke,
 
 	.cancel_request = guc_context_cancel_request,
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ce446716d092..b1a9bec83339 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -583,7 +583,7 @@ bool __i915_request_submit(struct i915_request *request)
 		goto active;
 	}
 
-	if (unlikely(intel_context_is_banned(request->context)))
+	if (unlikely(!intel_context_is_schedulable(request->context)))
 		i915_request_set_error_once(request, -EIO);
 
 	if (unlikely(fatal_error(request->fence.error)))
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts
  2021-08-05 16:32 ` Matthew Brost
@ 2021-08-06 10:44   ` Tvrtko Ursulin
  0 siblings, 0 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2021-08-06 10:44 UTC (permalink / raw)
  To: Matthew Brost
  Cc: Intel-gfx, dri-devel, Tvrtko Ursulin, Chris Wilson, Zhen Han


On 05/08/2021 17:32, Matthew Brost wrote:
> On Thu, Aug 05, 2021 at 01:05:09PM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> When a non-persistent context exits we currently mark it as banned in
>> order to trigger fast termination of any outstanding GPU jobs it may have
>> left running.
>>
>> In doing so we apply a very strict 1ms limit in which the left over job
>> has to preempt before we issues an engine resets.
>>
>> Some workloads are not able to cleanly preempt in that time window and it
>> can be argued that it would instead be better to give them a bit more
>> grace since avoiding engine resets is generally preferrable.
>>
>> To achieve this the patch splits handling of banned contexts from simply
>> closed non-persistent ones and then applies different timeouts for both
>> and also extends the criteria which determines if a request should be
>> scheduled back in after preemption or not.
>>
>> 15ms preempt timeout grace is given to exited non-persistent contexts
>> which have been empirically tested to satisfy customers requirements
>> and still provides reasonably quick cleanup post exit.
>>
> 
> I think you need to rework your thinking here a bit as this a very
> execlists specific solution and the GuC needs to be considered.

Slipped my mind GuC patches were merged in the meantime. (This patch 
predates that.) But I think wording in the commit message is fine. It is 
just the implementation that now has to handle the GuC as well.

>> v2:
>>   * Streamline fast path checks.
>>
>> v3:
>>   * Simplify by using only schedulable status.
>>   * Increase timeout to 20ms.
>>
>> v4:
>>   * Fix live_execlists selftest.
>>
>> v5:
>>   * Fix logic in kill_engines.
>>
>> v6:
>>   * Rebase.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Zhen Han <zhen.han@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 22 +++++++++++++------
>>   drivers/gpu/drm/i915/gt/intel_context.c       |  2 ++
>>   drivers/gpu/drm/i915/gt/intel_context.h       | 17 +++++++++++++-
>>   drivers/gpu/drm/i915/gt/intel_context_types.h |  1 +
>>   .../drm/i915/gt/intel_execlists_submission.c  | 11 ++++++++--
>>   drivers/gpu/drm/i915/gt/selftest_execlists.c  | 20 +++++++++++------
>>   drivers/gpu/drm/i915/i915_request.c           |  2 +-
>>   7 files changed, 57 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> index cff72679ad7c..21fe5d4057ab 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> @@ -1065,7 +1065,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
>>   	return engine;
>>   }
>>   
>> -static void kill_engines(struct i915_gem_engines *engines, bool ban)
>> +static void
>> +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
>>   {
>>   	struct i915_gem_engines_iter it;
>>   	struct intel_context *ce;
>> @@ -1079,8 +1080,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>>   	 */
>>   	for_each_gem_engine(ce, engines, it) {
>>   		struct intel_engine_cs *engine;
>> +		bool skip = false;
>> +
>> +		if (ban)
>> +			skip = intel_context_ban(ce, NULL);
>> +		else if (!persistent)
>> +			skip = !intel_context_clear_schedulable(ce);
> 
> schedulable doesn't hook into the backend at all, while
> intel_context_ban does. In the case of GuC submission intel_context_ban
> changes to preemption timeout to 1 us and disables scheduling resulting
> in the context getting kicked off the hardware immediately. You likely
> need to update intel_context_clear_schedulable to use the same vfunc as
> intel_context_ban() but accept an argument for the value of the
> preemption timeout. For a ban user a lower value, for clearing
> schedulable use a higher value.

Okay I'll have a look. Might go back to closed flag as opposed to 
schedulable as well since I don't quite like schedulable being the odd 
one out.

> 
>>   
>> -		if (ban && intel_context_ban(ce, NULL))
>> +		/* Already previously banned or made non-schedulable? */
>> +		if (skip)
>>   			continue;
>>   
>>   		/*
>> @@ -1093,7 +1101,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>>   		engine = active_engine(ce);
>>   
>>   		/* First attempt to gracefully cancel the context */
>> -		if (engine && !__cancel_engine(engine) && ban)
>> +		if (engine && !__cancel_engine(engine) && (ban || !persistent))
>>   			/*
>>   			 * If we are unable to send a preemptive pulse to bump
>>   			 * the context from the GPU, we have to resort to a full
>> @@ -1105,8 +1113,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>>   
>>   static void kill_context(struct i915_gem_context *ctx)
>>   {
>> -	bool ban = (!i915_gem_context_is_persistent(ctx) ||
>> -		    !ctx->i915->params.enable_hangcheck);
>>   	struct i915_gem_engines *pos, *next;
>>   
>>   	spin_lock_irq(&ctx->stale.lock);
>> @@ -1119,7 +1125,8 @@ static void kill_context(struct i915_gem_context *ctx)
>>   
>>   		spin_unlock_irq(&ctx->stale.lock);
>>   
>> -		kill_engines(pos, ban);
>> +		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
>> +			     i915_gem_context_is_persistent(ctx));
>>   
>>   		spin_lock_irq(&ctx->stale.lock);
>>   		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
>> @@ -1165,7 +1172,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
>>   
>>   kill:
>>   	if (list_empty(&engines->link)) /* raced, already closed */
>> -		kill_engines(engines, true);
>> +		kill_engines(engines, true,
>> +			     i915_gem_context_is_persistent(ctx));
>>   
>>   	i915_sw_fence_commit(&engines->fence);
>>   }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
>> index 745e84c72c90..bc1701ef1578 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
>> @@ -382,6 +382,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
>>   	ce->ring = NULL;
>>   	ce->ring_size = SZ_4K;
>>   
>> +	__set_bit(CONTEXT_SCHEDULABLE, &ce->flags);
>> +
>>   	ewma_runtime_init(&ce->runtime.avg);
>>   
>>   	ce->vm = i915_vm_get(engine->gt->vm);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
>> index c41098950746..5b50716654dd 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
>> @@ -251,7 +251,22 @@ static inline bool intel_context_is_banned(const struct intel_context *ce)
>>   
>>   static inline bool intel_context_set_banned(struct intel_context *ce)
>>   {
>> -	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
>> +	bool banned = test_and_set_bit(CONTEXT_BANNED, &ce->flags);
>> +
>> +	if (!banned)
>> +		clear_bit(CONTEXT_SCHEDULABLE, &ce->flags);
>> +
>> +	return banned;
>> +}
>> +
>> +static inline bool intel_context_clear_schedulable(struct intel_context *ce)
>> +{
>> +	return test_and_clear_bit(CONTEXT_SCHEDULABLE, &ce->flags);
>> +}
>> +
>> +static inline bool intel_context_is_schedulable(const struct intel_context *ce)
>> +{
>> +	return test_bit(CONTEXT_SCHEDULABLE, &ce->flags);
>>   }
>>   
>>   static inline bool intel_context_ban(struct intel_context *ce,
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> index e54351a170e2..3306c70c9c54 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> @@ -112,6 +112,7 @@ struct intel_context {
>>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
>>   #define CONTEXT_NOPREEMPT		8
>>   #define CONTEXT_LRCA_DIRTY		9
>> +#define CONTEXT_SCHEDULABLE		10  /* Unless banned or non-persistent closed. */
>>   
>>   	struct {
>>   		u64 timeout_us;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> index de5f9c86b9a4..778f3cda3c71 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
>>   		     !intel_engine_has_heartbeat(engine)))
>>   		intel_context_set_banned(ce);
>>   
>> -	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>> +	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
>>   		reset_active(rq, engine);
>>   
>>   	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>> @@ -1222,12 +1222,19 @@ static void record_preemption(struct intel_engine_execlists *execlists)
>>   static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
>>   					    const struct i915_request *rq)
>>   {
>> +	struct intel_context *ce;
>> +
>>   	if (!rq)
>>   		return 0;
>>   
>> +	ce = rq->context;
>> +
>>   	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
>> -	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
>> +	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>>   		return 1;
>> +	/* Longer grace for closed non-persistent contexts to avoid resets. */
>> +	else if (unlikely(!intel_context_is_schedulable(ce)))
>> +		return 20;
> 
> Likely want a define for '1' and '20' too.

Since the addition of GuC yeah, true.

Regards,

Tvrtko

> 
> Matt
> 
>>   
>>   	return READ_ONCE(engine->props.preempt_timeout_ms);
>>   }
>> diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
>> index f12ffe797639..da36c015caf4 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
>> @@ -2050,6 +2050,12 @@ struct live_preempt_cancel {
>>   	struct preempt_client a, b;
>>   };
>>   
>> +static void context_clear_banned(struct intel_context *ce)
>> +{
>> +	clear_bit(CONTEXT_BANNED, &ce->flags);
>> +	set_bit(CONTEXT_SCHEDULABLE, &ce->flags);
>> +}
>> +
>>   static int __cancel_active0(struct live_preempt_cancel *arg)
>>   {
>>   	struct i915_request *rq;
>> @@ -2068,7 +2074,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
>>   	if (IS_ERR(rq))
>>   		return PTR_ERR(rq);
>>   
>> -	clear_bit(CONTEXT_BANNED, &rq->context->flags);
>> +	context_clear_banned(rq->context);
>>   	i915_request_get(rq);
>>   	i915_request_add(rq);
>>   	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
>> @@ -2112,7 +2118,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
>>   	if (IS_ERR(rq[0]))
>>   		return PTR_ERR(rq[0]);
>>   
>> -	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
>> +	context_clear_banned(rq[0]->context);
>>   	i915_request_get(rq[0]);
>>   	i915_request_add(rq[0]);
>>   	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
>> @@ -2128,7 +2134,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
>>   		goto out;
>>   	}
>>   
>> -	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
>> +	context_clear_banned(rq[1]->context);
>>   	i915_request_get(rq[1]);
>>   	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
>>   	i915_request_add(rq[1]);
>> @@ -2183,7 +2189,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
>>   	if (IS_ERR(rq[0]))
>>   		return PTR_ERR(rq[0]);
>>   
>> -	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
>> +	context_clear_banned(rq[0]->context);
>>   	i915_request_get(rq[0]);
>>   	i915_request_add(rq[0]);
>>   	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
>> @@ -2197,7 +2203,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
>>   		goto out;
>>   	}
>>   
>> -	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
>> +	context_clear_banned(rq[1]->context);
>>   	i915_request_get(rq[1]);
>>   	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
>>   	i915_request_add(rq[1]);
>> @@ -2273,7 +2279,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
>>   	if (IS_ERR(rq))
>>   		return PTR_ERR(rq);
>>   
>> -	clear_bit(CONTEXT_BANNED, &rq->context->flags);
>> +	context_clear_banned(rq->context);
>>   	i915_request_get(rq);
>>   	i915_request_add(rq);
>>   	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
>> @@ -2329,7 +2335,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg)
>>   	if (IS_ERR(rq))
>>   		return PTR_ERR(rq);
>>   
>> -	clear_bit(CONTEXT_BANNED, &rq->context->flags);
>> +	context_clear_banned(rq->context);
>>   	i915_request_get(rq);
>>   	i915_request_add(rq);
>>   	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>> index ce446716d092..b1a9bec83339 100644
>> --- a/drivers/gpu/drm/i915/i915_request.c
>> +++ b/drivers/gpu/drm/i915/i915_request.c
>> @@ -583,7 +583,7 @@ bool __i915_request_submit(struct i915_request *request)
>>   		goto active;
>>   	}
>>   
>> -	if (unlikely(intel_context_is_banned(request->context)))
>> +	if (unlikely(!intel_context_is_schedulable(request->context)))
>>   		i915_request_set_error_once(request, -EIO);
>>   
>>   	if (unlikely(fatal_error(request->fence.error)))
>> -- 
>> 2.30.2
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts
  2021-08-05 12:05 Tvrtko Ursulin
@ 2021-08-05 16:32 ` Matthew Brost
  2021-08-06 10:44   ` Tvrtko Ursulin
  0 siblings, 1 reply; 9+ messages in thread
From: Matthew Brost @ 2021-08-05 16:32 UTC (permalink / raw)
  To: Tvrtko Ursulin
  Cc: Intel-gfx, dri-devel, Tvrtko Ursulin, Chris Wilson, Zhen Han

On Thu, Aug 05, 2021 at 01:05:09PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> When a non-persistent context exits we currently mark it as banned in
> order to trigger fast termination of any outstanding GPU jobs it may have
> left running.
> 
> In doing so we apply a very strict 1ms limit in which the left over job
> has to preempt before we issues an engine resets.
> 
> Some workloads are not able to cleanly preempt in that time window and it
> can be argued that it would instead be better to give them a bit more
> grace since avoiding engine resets is generally preferrable.
> 
> To achieve this the patch splits handling of banned contexts from simply
> closed non-persistent ones and then applies different timeouts for both
> and also extends the criteria which determines if a request should be
> scheduled back in after preemption or not.
> 
> 15ms preempt timeout grace is given to exited non-persistent contexts
> which have been empirically tested to satisfy customers requirements
> and still provides reasonably quick cleanup post exit.
> 

I think you need to rework your thinking here a bit as this a very
execlists specific solution and the GuC needs to be considered.

> v2:
>  * Streamline fast path checks.
> 
> v3:
>  * Simplify by using only schedulable status.
>  * Increase timeout to 20ms.
> 
> v4:
>  * Fix live_execlists selftest.
> 
> v5:
>  * Fix logic in kill_engines.
> 
> v6:
>  * Rebase.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Zhen Han <zhen.han@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 22 +++++++++++++------
>  drivers/gpu/drm/i915/gt/intel_context.c       |  2 ++
>  drivers/gpu/drm/i915/gt/intel_context.h       | 17 +++++++++++++-
>  drivers/gpu/drm/i915/gt/intel_context_types.h |  1 +
>  .../drm/i915/gt/intel_execlists_submission.c  | 11 ++++++++--
>  drivers/gpu/drm/i915/gt/selftest_execlists.c  | 20 +++++++++++------
>  drivers/gpu/drm/i915/i915_request.c           |  2 +-
>  7 files changed, 57 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index cff72679ad7c..21fe5d4057ab 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -1065,7 +1065,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
>  	return engine;
>  }
>  
> -static void kill_engines(struct i915_gem_engines *engines, bool ban)
> +static void
> +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
>  {
>  	struct i915_gem_engines_iter it;
>  	struct intel_context *ce;
> @@ -1079,8 +1080,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  	 */
>  	for_each_gem_engine(ce, engines, it) {
>  		struct intel_engine_cs *engine;
> +		bool skip = false;
> +
> +		if (ban)
> +			skip = intel_context_ban(ce, NULL);
> +		else if (!persistent)
> +			skip = !intel_context_clear_schedulable(ce);

schedulable doesn't hook into the backend at all, while
intel_context_ban does. In the case of GuC submission intel_context_ban
changes to preemption timeout to 1 us and disables scheduling resulting
in the context getting kicked off the hardware immediately. You likely
need to update intel_context_clear_schedulable to use the same vfunc as
intel_context_ban() but accept an argument for the value of the
preemption timeout. For a ban user a lower value, for clearing
schedulable use a higher value.

>  
> -		if (ban && intel_context_ban(ce, NULL))
> +		/* Already previously banned or made non-schedulable? */
> +		if (skip)
>  			continue;
>  
>  		/*
> @@ -1093,7 +1101,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  		engine = active_engine(ce);
>  
>  		/* First attempt to gracefully cancel the context */
> -		if (engine && !__cancel_engine(engine) && ban)
> +		if (engine && !__cancel_engine(engine) && (ban || !persistent))
>  			/*
>  			 * If we are unable to send a preemptive pulse to bump
>  			 * the context from the GPU, we have to resort to a full
> @@ -1105,8 +1113,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  
>  static void kill_context(struct i915_gem_context *ctx)
>  {
> -	bool ban = (!i915_gem_context_is_persistent(ctx) ||
> -		    !ctx->i915->params.enable_hangcheck);
>  	struct i915_gem_engines *pos, *next;
>  
>  	spin_lock_irq(&ctx->stale.lock);
> @@ -1119,7 +1125,8 @@ static void kill_context(struct i915_gem_context *ctx)
>  
>  		spin_unlock_irq(&ctx->stale.lock);
>  
> -		kill_engines(pos, ban);
> +		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
> +			     i915_gem_context_is_persistent(ctx));
>  
>  		spin_lock_irq(&ctx->stale.lock);
>  		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
> @@ -1165,7 +1172,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
>  
>  kill:
>  	if (list_empty(&engines->link)) /* raced, already closed */
> -		kill_engines(engines, true);
> +		kill_engines(engines, true,
> +			     i915_gem_context_is_persistent(ctx));
>  
>  	i915_sw_fence_commit(&engines->fence);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 745e84c72c90..bc1701ef1578 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -382,6 +382,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
>  	ce->ring = NULL;
>  	ce->ring_size = SZ_4K;
>  
> +	__set_bit(CONTEXT_SCHEDULABLE, &ce->flags);
> +
>  	ewma_runtime_init(&ce->runtime.avg);
>  
>  	ce->vm = i915_vm_get(engine->gt->vm);
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index c41098950746..5b50716654dd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -251,7 +251,22 @@ static inline bool intel_context_is_banned(const struct intel_context *ce)
>  
>  static inline bool intel_context_set_banned(struct intel_context *ce)
>  {
> -	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
> +	bool banned = test_and_set_bit(CONTEXT_BANNED, &ce->flags);
> +
> +	if (!banned)
> +		clear_bit(CONTEXT_SCHEDULABLE, &ce->flags);
> +
> +	return banned;
> +}
> +
> +static inline bool intel_context_clear_schedulable(struct intel_context *ce)
> +{
> +	return test_and_clear_bit(CONTEXT_SCHEDULABLE, &ce->flags);
> +}
> +
> +static inline bool intel_context_is_schedulable(const struct intel_context *ce)
> +{
> +	return test_bit(CONTEXT_SCHEDULABLE, &ce->flags);
>  }
>  
>  static inline bool intel_context_ban(struct intel_context *ce,
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index e54351a170e2..3306c70c9c54 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -112,6 +112,7 @@ struct intel_context {
>  #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
>  #define CONTEXT_NOPREEMPT		8
>  #define CONTEXT_LRCA_DIRTY		9
> +#define CONTEXT_SCHEDULABLE		10  /* Unless banned or non-persistent closed. */
>  
>  	struct {
>  		u64 timeout_us;
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index de5f9c86b9a4..778f3cda3c71 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
>  		     !intel_engine_has_heartbeat(engine)))
>  		intel_context_set_banned(ce);
>  
> -	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
> +	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
>  		reset_active(rq, engine);
>  
>  	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> @@ -1222,12 +1222,19 @@ static void record_preemption(struct intel_engine_execlists *execlists)
>  static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
>  					    const struct i915_request *rq)
>  {
> +	struct intel_context *ce;
> +
>  	if (!rq)
>  		return 0;
>  
> +	ce = rq->context;
> +
>  	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
> -	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
> +	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>  		return 1;
> +	/* Longer grace for closed non-persistent contexts to avoid resets. */
> +	else if (unlikely(!intel_context_is_schedulable(ce)))
> +		return 20;

Likely want a define for '1' and '20' too.

Matt

>  
>  	return READ_ONCE(engine->props.preempt_timeout_ms);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> index f12ffe797639..da36c015caf4 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
> @@ -2050,6 +2050,12 @@ struct live_preempt_cancel {
>  	struct preempt_client a, b;
>  };
>  
> +static void context_clear_banned(struct intel_context *ce)
> +{
> +	clear_bit(CONTEXT_BANNED, &ce->flags);
> +	set_bit(CONTEXT_SCHEDULABLE, &ce->flags);
> +}
> +
>  static int __cancel_active0(struct live_preempt_cancel *arg)
>  {
>  	struct i915_request *rq;
> @@ -2068,7 +2074,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
>  	if (IS_ERR(rq))
>  		return PTR_ERR(rq);
>  
> -	clear_bit(CONTEXT_BANNED, &rq->context->flags);
> +	context_clear_banned(rq->context);
>  	i915_request_get(rq);
>  	i915_request_add(rq);
>  	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
> @@ -2112,7 +2118,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
>  	if (IS_ERR(rq[0]))
>  		return PTR_ERR(rq[0]);
>  
> -	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
> +	context_clear_banned(rq[0]->context);
>  	i915_request_get(rq[0]);
>  	i915_request_add(rq[0]);
>  	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
> @@ -2128,7 +2134,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
>  		goto out;
>  	}
>  
> -	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
> +	context_clear_banned(rq[1]->context);
>  	i915_request_get(rq[1]);
>  	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
>  	i915_request_add(rq[1]);
> @@ -2183,7 +2189,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
>  	if (IS_ERR(rq[0]))
>  		return PTR_ERR(rq[0]);
>  
> -	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
> +	context_clear_banned(rq[0]->context);
>  	i915_request_get(rq[0]);
>  	i915_request_add(rq[0]);
>  	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
> @@ -2197,7 +2203,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
>  		goto out;
>  	}
>  
> -	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
> +	context_clear_banned(rq[1]->context);
>  	i915_request_get(rq[1]);
>  	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
>  	i915_request_add(rq[1]);
> @@ -2273,7 +2279,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
>  	if (IS_ERR(rq))
>  		return PTR_ERR(rq);
>  
> -	clear_bit(CONTEXT_BANNED, &rq->context->flags);
> +	context_clear_banned(rq->context);
>  	i915_request_get(rq);
>  	i915_request_add(rq);
>  	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
> @@ -2329,7 +2335,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg)
>  	if (IS_ERR(rq))
>  		return PTR_ERR(rq);
>  
> -	clear_bit(CONTEXT_BANNED, &rq->context->flags);
> +	context_clear_banned(rq->context);
>  	i915_request_get(rq);
>  	i915_request_add(rq);
>  	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index ce446716d092..b1a9bec83339 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -583,7 +583,7 @@ bool __i915_request_submit(struct i915_request *request)
>  		goto active;
>  	}
>  
> -	if (unlikely(intel_context_is_banned(request->context)))
> +	if (unlikely(!intel_context_is_schedulable(request->context)))
>  		i915_request_set_error_once(request, -EIO);
>  
>  	if (unlikely(fatal_error(request->fence.error)))
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts
@ 2021-08-05 12:05 Tvrtko Ursulin
  2021-08-05 16:32 ` Matthew Brost
  0 siblings, 1 reply; 9+ messages in thread
From: Tvrtko Ursulin @ 2021-08-05 12:05 UTC (permalink / raw)
  To: Intel-gfx; +Cc: dri-devel, Tvrtko Ursulin, Chris Wilson, Zhen Han

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When a non-persistent context exits we currently mark it as banned in
order to trigger fast termination of any outstanding GPU jobs it may have
left running.

In doing so we apply a very strict 1ms limit in which the left over job
has to preempt before we issues an engine resets.

Some workloads are not able to cleanly preempt in that time window and it
can be argued that it would instead be better to give them a bit more
grace since avoiding engine resets is generally preferrable.

To achieve this the patch splits handling of banned contexts from simply
closed non-persistent ones and then applies different timeouts for both
and also extends the criteria which determines if a request should be
scheduled back in after preemption or not.

15ms preempt timeout grace is given to exited non-persistent contexts
which have been empirically tested to satisfy customers requirements
and still provides reasonably quick cleanup post exit.

v2:
 * Streamline fast path checks.

v3:
 * Simplify by using only schedulable status.
 * Increase timeout to 20ms.

v4:
 * Fix live_execlists selftest.

v5:
 * Fix logic in kill_engines.

v6:
 * Rebase.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhen Han <zhen.han@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 22 +++++++++++++------
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 ++
 drivers/gpu/drm/i915/gt/intel_context.h       | 17 +++++++++++++-
 drivers/gpu/drm/i915/gt/intel_context_types.h |  1 +
 .../drm/i915/gt/intel_execlists_submission.c  | 11 ++++++++--
 drivers/gpu/drm/i915/gt/selftest_execlists.c  | 20 +++++++++++------
 drivers/gpu/drm/i915/i915_request.c           |  2 +-
 7 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index cff72679ad7c..21fe5d4057ab 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1065,7 +1065,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
 	return engine;
 }
 
-static void kill_engines(struct i915_gem_engines *engines, bool ban)
+static void
+kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
 {
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -1079,8 +1080,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 	 */
 	for_each_gem_engine(ce, engines, it) {
 		struct intel_engine_cs *engine;
+		bool skip = false;
+
+		if (ban)
+			skip = intel_context_ban(ce, NULL);
+		else if (!persistent)
+			skip = !intel_context_clear_schedulable(ce);
 
-		if (ban && intel_context_ban(ce, NULL))
+		/* Already previously banned or made non-schedulable? */
+		if (skip)
 			continue;
 
 		/*
@@ -1093,7 +1101,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 		engine = active_engine(ce);
 
 		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
+		if (engine && !__cancel_engine(engine) && (ban || !persistent))
 			/*
 			 * If we are unable to send a preemptive pulse to bump
 			 * the context from the GPU, we have to resort to a full
@@ -1105,8 +1113,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 static void kill_context(struct i915_gem_context *ctx)
 {
-	bool ban = (!i915_gem_context_is_persistent(ctx) ||
-		    !ctx->i915->params.enable_hangcheck);
 	struct i915_gem_engines *pos, *next;
 
 	spin_lock_irq(&ctx->stale.lock);
@@ -1119,7 +1125,8 @@ static void kill_context(struct i915_gem_context *ctx)
 
 		spin_unlock_irq(&ctx->stale.lock);
 
-		kill_engines(pos, ban);
+		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+			     i915_gem_context_is_persistent(ctx));
 
 		spin_lock_irq(&ctx->stale.lock);
 		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -1165,7 +1172,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
 
 kill:
 	if (list_empty(&engines->link)) /* raced, already closed */
-		kill_engines(engines, true);
+		kill_engines(engines, true,
+			     i915_gem_context_is_persistent(ctx));
 
 	i915_sw_fence_commit(&engines->fence);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 745e84c72c90..bc1701ef1578 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -382,6 +382,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
 	ce->ring = NULL;
 	ce->ring_size = SZ_4K;
 
+	__set_bit(CONTEXT_SCHEDULABLE, &ce->flags);
+
 	ewma_runtime_init(&ce->runtime.avg);
 
 	ce->vm = i915_vm_get(engine->gt->vm);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index c41098950746..5b50716654dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -251,7 +251,22 @@ static inline bool intel_context_is_banned(const struct intel_context *ce)
 
 static inline bool intel_context_set_banned(struct intel_context *ce)
 {
-	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
+	bool banned = test_and_set_bit(CONTEXT_BANNED, &ce->flags);
+
+	if (!banned)
+		clear_bit(CONTEXT_SCHEDULABLE, &ce->flags);
+
+	return banned;
+}
+
+static inline bool intel_context_clear_schedulable(struct intel_context *ce)
+{
+	return test_and_clear_bit(CONTEXT_SCHEDULABLE, &ce->flags);
+}
+
+static inline bool intel_context_is_schedulable(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_SCHEDULABLE, &ce->flags);
 }
 
 static inline bool intel_context_ban(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e54351a170e2..3306c70c9c54 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -112,6 +112,7 @@ struct intel_context {
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
 #define CONTEXT_NOPREEMPT		8
 #define CONTEXT_LRCA_DIRTY		9
+#define CONTEXT_SCHEDULABLE		10  /* Unless banned or non-persistent closed. */
 
 	struct {
 		u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de5f9c86b9a4..778f3cda3c71 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
 		     !intel_engine_has_heartbeat(engine)))
 		intel_context_set_banned(ce);
 
-	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
+	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
 		reset_active(rq, engine);
 
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -1222,12 +1222,19 @@ static void record_preemption(struct intel_engine_execlists *execlists)
 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
 					    const struct i915_request *rq)
 {
+	struct intel_context *ce;
+
 	if (!rq)
 		return 0;
 
+	ce = rq->context;
+
 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
-	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
+	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
 		return 1;
+	/* Longer grace for closed non-persistent contexts to avoid resets. */
+	else if (unlikely(!intel_context_is_schedulable(ce)))
+		return 20;
 
 	return READ_ONCE(engine->props.preempt_timeout_ms);
 }
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index f12ffe797639..da36c015caf4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -2050,6 +2050,12 @@ struct live_preempt_cancel {
 	struct preempt_client a, b;
 };
 
+static void context_clear_banned(struct intel_context *ce)
+{
+	clear_bit(CONTEXT_BANNED, &ce->flags);
+	set_bit(CONTEXT_SCHEDULABLE, &ce->flags);
+}
+
 static int __cancel_active0(struct live_preempt_cancel *arg)
 {
 	struct i915_request *rq;
@@ -2068,7 +2074,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
-	clear_bit(CONTEXT_BANNED, &rq->context->flags);
+	context_clear_banned(rq->context);
 	i915_request_get(rq);
 	i915_request_add(rq);
 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
@@ -2112,7 +2118,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
 	if (IS_ERR(rq[0]))
 		return PTR_ERR(rq[0]);
 
-	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
+	context_clear_banned(rq[0]->context);
 	i915_request_get(rq[0]);
 	i915_request_add(rq[0]);
 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
@@ -2128,7 +2134,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
 		goto out;
 	}
 
-	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
+	context_clear_banned(rq[1]->context);
 	i915_request_get(rq[1]);
 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
 	i915_request_add(rq[1]);
@@ -2183,7 +2189,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
 	if (IS_ERR(rq[0]))
 		return PTR_ERR(rq[0]);
 
-	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
+	context_clear_banned(rq[0]->context);
 	i915_request_get(rq[0]);
 	i915_request_add(rq[0]);
 	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
@@ -2197,7 +2203,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
 		goto out;
 	}
 
-	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
+	context_clear_banned(rq[1]->context);
 	i915_request_get(rq[1]);
 	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
 	i915_request_add(rq[1]);
@@ -2273,7 +2279,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
-	clear_bit(CONTEXT_BANNED, &rq->context->flags);
+	context_clear_banned(rq->context);
 	i915_request_get(rq);
 	i915_request_add(rq);
 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
@@ -2329,7 +2335,7 @@ static int __cancel_fail(struct live_preempt_cancel *arg)
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
-	clear_bit(CONTEXT_BANNED, &rq->context->flags);
+	context_clear_banned(rq->context);
 	i915_request_get(rq);
 	i915_request_add(rq);
 	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ce446716d092..b1a9bec83339 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -583,7 +583,7 @@ bool __i915_request_submit(struct i915_request *request)
 		goto active;
 	}
 
-	if (unlikely(intel_context_is_banned(request->context)))
+	if (unlikely(!intel_context_is_schedulable(request->context)))
 		i915_request_set_error_once(request, -EIO);
 
 	if (unlikely(fatal_error(request->fence.error)))
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-02-25 15:29 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-24 13:03 [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts Tvrtko Ursulin
2022-02-24 23:37 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915: Be more gentle when exiting non-persistent contexts (rev3) Patchwork
2022-02-25  0:05 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2022-02-25 15:29 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2021-08-26 10:52 [Intel-gfx] [PATCH] drm/i915: Be more gentle when exiting non-persistent contexts Tvrtko Ursulin
2021-08-26 13:32 ` Daniel Vetter
2021-08-05 12:05 Tvrtko Ursulin
2021-08-05 16:32 ` Matthew Brost
2021-08-06 10:44   ` Tvrtko Ursulin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).