All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Be more gentle with exiting non-persistent context
@ 2021-04-29  9:46 ` Tvrtko Ursulin
  0 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2021-04-29  9:46 UTC (permalink / raw)
  To: Intel-gfx; +Cc: Zhen Han, Chris Wilson, dri-devel, Tvrtko Ursulin

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When a non-persistent context exits we currently mark it as banned in
order to trigger fast termination of any outstanding GPU jobs it may have
left running.

In doing so we apply a very strict 1ms limit in which the left over job
has to preempt before we issues an engine resets.

Some workloads are not able to cleanly preempt in that time window and it
can be argued that it would instead be better to give them a bit more
grace since avoiding engine resets is generally preferrable.

To achieve this the patch splits handling of banned contexts from simply
exited non-persistent ones and then applies different timeouts for both
and also extends the criteria which determines if a request should be
scheduled back in after preemption or not.

15ms preempt timeout grace is given to exited non-persistent contexts
which have been empirically tested to satisfy customers requirements
and still provides reasonably quick cleanup post exit.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhen Han <zhen.han@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c     | 15 +++++++++------
 drivers/gpu/drm/i915/gt/intel_context.h         | 17 +++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_context_types.h   |  1 +
 .../drm/i915/gt/intel_execlists_submission.c    | 12 ++++++++++--
 drivers/gpu/drm/i915/i915_request.c             |  2 +-
 5 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index fd8ee52e17a4..5a6eba1232cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
 	return engine;
 }
 
-static void kill_engines(struct i915_gem_engines *engines, bool ban)
+static void
+kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
 {
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -443,6 +444,8 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 		if (ban && intel_context_set_banned(ce))
 			continue;
+		else if (!persistent && intel_context_set_non_persistent(ce))
+			continue;
 
 		/*
 		 * Check the current active state of this context; if we
@@ -454,7 +457,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 		engine = active_engine(ce);
 
 		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
+		if (engine && !__cancel_engine(engine) && (ban || !persistent))
 			/*
 			 * If we are unable to send a preemptive pulse to bump
 			 * the context from the GPU, we have to resort to a full
@@ -466,8 +469,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 static void kill_context(struct i915_gem_context *ctx)
 {
-	bool ban = (!i915_gem_context_is_persistent(ctx) ||
-		    !ctx->i915->params.enable_hangcheck);
 	struct i915_gem_engines *pos, *next;
 
 	spin_lock_irq(&ctx->stale.lock);
@@ -480,7 +481,8 @@ static void kill_context(struct i915_gem_context *ctx)
 
 		spin_unlock_irq(&ctx->stale.lock);
 
-		kill_engines(pos, ban);
+		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+			     i915_gem_context_is_persistent(ctx));
 
 		spin_lock_irq(&ctx->stale.lock);
 		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -526,7 +528,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
 
 kill:
 	if (list_empty(&engines->link)) /* raced, already closed */
-		kill_engines(engines, true);
+		kill_engines(engines, true,
+			     i915_gem_context_is_persistent(ctx));
 
 	i915_sw_fence_commit(&engines->fence);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index f83a73a2b39f..b0e812b8ce39 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -220,6 +220,23 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
 	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
 }
 
+static inline bool intel_context_is_non_persistent(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
+}
+
+static inline bool intel_context_set_non_persistent(struct intel_context *ce)
+{
+	return test_and_set_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
+}
+
+static inline bool intel_context_is_schedulable(const struct intel_context *ce)
+{
+	return !intel_context_is_banned(ce) &&
+	       !(intel_context_is_closed(ce) &&
+	         intel_context_is_non_persistent(ce));
+}
+
 static inline bool
 intel_context_force_single_submission(const struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ed8c447a7346..aa949c539e93 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -95,6 +95,7 @@ struct intel_context {
 #define CONTEXT_BANNED			6
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
 #define CONTEXT_NOPREEMPT		8
+#define CONTEXT_NON_PERSISTENT		9 /* Only if also closed. */
 
 	struct {
 		u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de124870af44..5ad7272fbbc4 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
 		     !intel_engine_has_heartbeat(engine)))
 		intel_context_set_banned(ce);
 
-	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
+	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
 		reset_active(rq, engine);
 
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -1204,12 +1204,20 @@ static void record_preemption(struct intel_engine_execlists *execlists)
 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
 					    const struct i915_request *rq)
 {
+	struct intel_context *ce;
+
 	if (!rq)
 		return 0;
 
+	ce = rq->context;
+
 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
-	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
+	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
 		return 1;
+	/* Longer grace for closed non-persistent contexts to avoid resets. */
+	else if (unlikely(intel_context_is_closed(ce) &&
+			  intel_context_is_non_persistent(ce)))
+		return 15;
 
 	return READ_ONCE(engine->props.preempt_timeout_ms);
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index bec9c3652188..bbac87535923 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -660,7 +660,7 @@ bool __i915_request_submit(struct i915_request *request)
 		goto active;
 	}
 
-	if (unlikely(intel_context_is_banned(request->context)))
+	if (unlikely(!intel_context_is_schedulable(request->context)))
 		i915_request_set_error_once(request, -EIO);
 
 	if (unlikely(fatal_error(request->fence.error)))
-- 
2.30.2

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH] drm/i915: Be more gentle with exiting non-persistent context
@ 2021-04-29  9:46 ` Tvrtko Ursulin
  0 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2021-04-29  9:46 UTC (permalink / raw)
  To: Intel-gfx; +Cc: Zhen Han, Chris Wilson, dri-devel

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When a non-persistent context exits we currently mark it as banned in
order to trigger fast termination of any outstanding GPU jobs it may have
left running.

In doing so we apply a very strict 1ms limit in which the left over job
has to preempt before we issues an engine resets.

Some workloads are not able to cleanly preempt in that time window and it
can be argued that it would instead be better to give them a bit more
grace since avoiding engine resets is generally preferrable.

To achieve this the patch splits handling of banned contexts from simply
exited non-persistent ones and then applies different timeouts for both
and also extends the criteria which determines if a request should be
scheduled back in after preemption or not.

15ms preempt timeout grace is given to exited non-persistent contexts
which have been empirically tested to satisfy customers requirements
and still provides reasonably quick cleanup post exit.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhen Han <zhen.han@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c     | 15 +++++++++------
 drivers/gpu/drm/i915/gt/intel_context.h         | 17 +++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_context_types.h   |  1 +
 .../drm/i915/gt/intel_execlists_submission.c    | 12 ++++++++++--
 drivers/gpu/drm/i915/i915_request.c             |  2 +-
 5 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index fd8ee52e17a4..5a6eba1232cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
 	return engine;
 }
 
-static void kill_engines(struct i915_gem_engines *engines, bool ban)
+static void
+kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
 {
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -443,6 +444,8 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 		if (ban && intel_context_set_banned(ce))
 			continue;
+		else if (!persistent && intel_context_set_non_persistent(ce))
+			continue;
 
 		/*
 		 * Check the current active state of this context; if we
@@ -454,7 +457,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 		engine = active_engine(ce);
 
 		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
+		if (engine && !__cancel_engine(engine) && (ban || !persistent))
 			/*
 			 * If we are unable to send a preemptive pulse to bump
 			 * the context from the GPU, we have to resort to a full
@@ -466,8 +469,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 static void kill_context(struct i915_gem_context *ctx)
 {
-	bool ban = (!i915_gem_context_is_persistent(ctx) ||
-		    !ctx->i915->params.enable_hangcheck);
 	struct i915_gem_engines *pos, *next;
 
 	spin_lock_irq(&ctx->stale.lock);
@@ -480,7 +481,8 @@ static void kill_context(struct i915_gem_context *ctx)
 
 		spin_unlock_irq(&ctx->stale.lock);
 
-		kill_engines(pos, ban);
+		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+			     i915_gem_context_is_persistent(ctx));
 
 		spin_lock_irq(&ctx->stale.lock);
 		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -526,7 +528,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
 
 kill:
 	if (list_empty(&engines->link)) /* raced, already closed */
-		kill_engines(engines, true);
+		kill_engines(engines, true,
+			     i915_gem_context_is_persistent(ctx));
 
 	i915_sw_fence_commit(&engines->fence);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index f83a73a2b39f..b0e812b8ce39 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -220,6 +220,23 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
 	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
 }
 
+static inline bool intel_context_is_non_persistent(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
+}
+
+static inline bool intel_context_set_non_persistent(struct intel_context *ce)
+{
+	return test_and_set_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
+}
+
+static inline bool intel_context_is_schedulable(const struct intel_context *ce)
+{
+	return !intel_context_is_banned(ce) &&
+	       !(intel_context_is_closed(ce) &&
+	         intel_context_is_non_persistent(ce));
+}
+
 static inline bool
 intel_context_force_single_submission(const struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ed8c447a7346..aa949c539e93 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -95,6 +95,7 @@ struct intel_context {
 #define CONTEXT_BANNED			6
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
 #define CONTEXT_NOPREEMPT		8
+#define CONTEXT_NON_PERSISTENT		9 /* Only if also closed. */
 
 	struct {
 		u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de124870af44..5ad7272fbbc4 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
 		     !intel_engine_has_heartbeat(engine)))
 		intel_context_set_banned(ce);
 
-	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
+	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
 		reset_active(rq, engine);
 
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -1204,12 +1204,20 @@ static void record_preemption(struct intel_engine_execlists *execlists)
 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
 					    const struct i915_request *rq)
 {
+	struct intel_context *ce;
+
 	if (!rq)
 		return 0;
 
+	ce = rq->context;
+
 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
-	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
+	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
 		return 1;
+	/* Longer grace for closed non-persistent contexts to avoid resets. */
+	else if (unlikely(intel_context_is_closed(ce) &&
+			  intel_context_is_non_persistent(ce)))
+		return 15;
 
 	return READ_ONCE(engine->props.preempt_timeout_ms);
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index bec9c3652188..bbac87535923 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -660,7 +660,7 @@ bool __i915_request_submit(struct i915_request *request)
 		goto active;
 	}
 
-	if (unlikely(intel_context_is_banned(request->context)))
+	if (unlikely(!intel_context_is_schedulable(request->context)))
 		i915_request_set_error_once(request, -EIO);
 
 	if (unlikely(fatal_error(request->fence.error)))
-- 
2.30.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Be more gentle with exiting non-persistent context
  2021-04-29  9:46 ` [Intel-gfx] " Tvrtko Ursulin
  (?)
@ 2021-04-29 15:13 ` Patchwork
  -1 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2021-04-29 15:13 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Be more gentle with exiting non-persistent context
URL   : https://patchwork.freedesktop.org/series/89644/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
6d335c292ae3 drm/i915: Be more gentle with exiting non-persistent context
-:113: ERROR:CODE_INDENT: code indent should use tabs where possible
#113: FILE: drivers/gpu/drm/i915/gt/intel_context.h:237:
+^I         intel_context_is_non_persistent(ce));$

total: 1 errors, 0 warnings, 0 checks, 118 lines checked


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Be more gentle with exiting non-persistent context
  2021-04-29  9:46 ` [Intel-gfx] " Tvrtko Ursulin
  (?)
  (?)
@ 2021-04-29 15:43 ` Patchwork
  -1 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2021-04-29 15:43 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 4336 bytes --]

== Series Details ==

Series: drm/i915: Be more gentle with exiting non-persistent context
URL   : https://patchwork.freedesktop.org/series/89644/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_10027 -> Patchwork_20032
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/index.html

Known issues
------------

  Here are the changes found in Patchwork_20032 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_prime@amd-to-i915:
    - fi-tgl-y:           NOTRUN -> [SKIP][1] ([fdo#109315] / [i915#2575])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/fi-tgl-y/igt@amdgpu/amd_prime@amd-to-i915.html

  * igt@gem_huc_copy@huc-copy:
    - fi-skl-guc:         NOTRUN -> [SKIP][2] ([fdo#109271] / [i915#2190])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/fi-skl-guc/igt@gem_huc_copy@huc-copy.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-skl-guc:         NOTRUN -> [SKIP][3] ([fdo#109271] / [fdo#111827]) +8 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/fi-skl-guc/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - fi-skl-guc:         NOTRUN -> [SKIP][4] ([fdo#109271] / [i915#533])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/fi-skl-guc/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@kms_psr@primary_mmap_gtt:
    - fi-skl-guc:         NOTRUN -> [SKIP][5] ([fdo#109271]) +26 similar issues
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/fi-skl-guc/igt@kms_psr@primary_mmap_gtt.html

  * igt@runner@aborted:
    - fi-bdw-5557u:       NOTRUN -> [FAIL][6] ([i915#1602] / [i915#2029])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/fi-bdw-5557u/igt@runner@aborted.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-tgl-u2:          [FAIL][7] ([i915#1888]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/fi-tgl-u2/igt@gem_exec_suspend@basic-s3.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/fi-tgl-u2/igt@gem_exec_suspend@basic-s3.html

  * igt@kms_frontbuffer_tracking@basic:
    - {fi-rkl-11500t}:    [SKIP][9] ([i915#1849] / [i915#3180]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/fi-rkl-11500t/igt@kms_frontbuffer_tracking@basic.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/fi-rkl-11500t/igt@kms_frontbuffer_tracking@basic.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1602]: https://gitlab.freedesktop.org/drm/intel/issues/1602
  [i915#1849]: https://gitlab.freedesktop.org/drm/intel/issues/1849
  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#2029]: https://gitlab.freedesktop.org/drm/intel/issues/2029
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#2575]: https://gitlab.freedesktop.org/drm/intel/issues/2575
  [i915#3180]: https://gitlab.freedesktop.org/drm/intel/issues/3180
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533


Participating hosts (44 -> 41)
------------------------------

  Additional (1): fi-skl-guc 
  Missing    (4): fi-ilk-m540 fi-bsw-cyan fi-bdw-samus fi-hsw-4200u 


Build changes
-------------

  * Linux: CI_DRM_10027 -> Patchwork_20032

  CI-20190529: 20190529
  CI_DRM_10027: 1748cb1e8bdf543570b86f39487b171ad4c1f896 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6077: 126a3f6fc0e97786e2819085efc84e741093aed5 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_20032: 6d335c292ae34e8239dd42e1f358c0621b8f685b @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

6d335c292ae3 drm/i915: Be more gentle with exiting non-persistent context

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/index.html

[-- Attachment #1.2: Type: text/html, Size: 5335 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915: Be more gentle with exiting non-persistent context
  2021-04-29  9:46 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  (?)
@ 2021-04-29 18:41 ` Patchwork
  -1 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2021-04-29 18:41 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 30283 bytes --]

== Series Details ==

Series: drm/i915: Be more gentle with exiting non-persistent context
URL   : https://patchwork.freedesktop.org/series/89644/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_10027_full -> Patchwork_20032_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_20032_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_20032_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_20032_full:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_cursor_legacy@all-pipes-single-bo:
    - shard-iclb:         [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb6/igt@kms_cursor_legacy@all-pipes-single-bo.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb8/igt@kms_cursor_legacy@all-pipes-single-bo.html

  
Known issues
------------

  Here are the changes found in Patchwork_20032_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_create@create-clear:
    - shard-glk:          [PASS][3] -> [FAIL][4] ([i915#1888] / [i915#3160])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-glk8/igt@gem_create@create-clear.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk9/igt@gem_create@create-clear.html

  * igt@gem_create@create-massive:
    - shard-apl:          NOTRUN -> [DMESG-WARN][5] ([i915#3002])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl6/igt@gem_create@create-massive.html

  * igt@gem_ctx_persistence@legacy-engines-hostile@render:
    - shard-iclb:         [PASS][6] -> [FAIL][7] ([i915#2410])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb6/igt@gem_ctx_persistence@legacy-engines-hostile@render.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb7/igt@gem_ctx_persistence@legacy-engines-hostile@render.html

  * igt@gem_ctx_persistence@legacy-engines-queued:
    - shard-snb:          NOTRUN -> [SKIP][8] ([fdo#109271] / [i915#1099]) +2 similar issues
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-snb5/igt@gem_ctx_persistence@legacy-engines-queued.html

  * igt@gem_ctx_ringsize@active@bcs0:
    - shard-skl:          NOTRUN -> [INCOMPLETE][9] ([i915#3316])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl7/igt@gem_ctx_ringsize@active@bcs0.html

  * igt@gem_exec_fair@basic-deadline:
    - shard-glk:          [PASS][10] -> [FAIL][11] ([i915#2846])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-glk2/igt@gem_exec_fair@basic-deadline.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk9/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-flow@rcs0:
    - shard-tglb:         [PASS][12] -> [FAIL][13] ([i915#2842])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-tglb2/igt@gem_exec_fair@basic-flow@rcs0.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-tglb7/igt@gem_exec_fair@basic-flow@rcs0.html

  * igt@gem_exec_fair@basic-pace@vcs1:
    - shard-iclb:         [PASS][14] -> [FAIL][15] ([i915#2842]) +2 similar issues
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb4/igt@gem_exec_fair@basic-pace@vcs1.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb4/igt@gem_exec_fair@basic-pace@vcs1.html

  * igt@gem_exec_fair@basic-pace@vecs0:
    - shard-kbl:          [PASS][16] -> [FAIL][17] ([i915#2842]) +1 similar issue
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-kbl7/igt@gem_exec_fair@basic-pace@vecs0.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl1/igt@gem_exec_fair@basic-pace@vecs0.html

  * igt@gem_exec_flush@basic-wb-ro-before-default:
    - shard-glk:          [PASS][18] -> [INCOMPLETE][19] ([i915#2944])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-glk9/igt@gem_exec_flush@basic-wb-ro-before-default.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk1/igt@gem_exec_flush@basic-wb-ro-before-default.html

  * igt@gem_mmap_gtt@cpuset-basic-small-copy-odd:
    - shard-glk:          [PASS][20] -> [FAIL][21] ([i915#307])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-glk6/igt@gem_mmap_gtt@cpuset-basic-small-copy-odd.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk4/igt@gem_mmap_gtt@cpuset-basic-small-copy-odd.html

  * igt@gem_mmap_gtt@cpuset-big-copy-odd:
    - shard-iclb:         [PASS][22] -> [FAIL][23] ([i915#307])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb6/igt@gem_mmap_gtt@cpuset-big-copy-odd.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb7/igt@gem_mmap_gtt@cpuset-big-copy-odd.html

  * igt@gem_userptr_blits@set-cache-level:
    - shard-apl:          NOTRUN -> [FAIL][24] ([i915#3324])
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl6/igt@gem_userptr_blits@set-cache-level.html

  * igt@gem_userptr_blits@vma-merge:
    - shard-snb:          NOTRUN -> [FAIL][25] ([i915#2724])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-snb2/igt@gem_userptr_blits@vma-merge.html

  * igt@gen9_exec_parse@batch-invalid-length:
    - shard-snb:          NOTRUN -> [SKIP][26] ([fdo#109271]) +478 similar issues
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-snb7/igt@gen9_exec_parse@batch-invalid-length.html

  * igt@i915_pm_dc@dc3co-vpb-simulation:
    - shard-kbl:          NOTRUN -> [SKIP][27] ([fdo#109271] / [i915#658])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl1/igt@i915_pm_dc@dc3co-vpb-simulation.html

  * igt@i915_suspend@sysfs-reader:
    - shard-kbl:          [PASS][28] -> [INCOMPLETE][29] ([i915#155])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-kbl7/igt@i915_suspend@sysfs-reader.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl3/igt@i915_suspend@sysfs-reader.html

  * igt@kms_big_fb@yf-tiled-addfb:
    - shard-tglb:         NOTRUN -> [SKIP][30] ([fdo#111615])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-tglb6/igt@kms_big_fb@yf-tiled-addfb.html

  * igt@kms_ccs@pipe-c-ccs-on-another-bo:
    - shard-skl:          NOTRUN -> [SKIP][31] ([fdo#109271] / [fdo#111304])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl5/igt@kms_ccs@pipe-c-ccs-on-another-bo.html

  * igt@kms_chamelium@vga-hpd-for-each-pipe:
    - shard-skl:          NOTRUN -> [SKIP][32] ([fdo#109271] / [fdo#111827]) +13 similar issues
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl7/igt@kms_chamelium@vga-hpd-for-each-pipe.html

  * igt@kms_color_chamelium@pipe-a-gamma:
    - shard-tglb:         NOTRUN -> [SKIP][33] ([fdo#109284] / [fdo#111827])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-tglb6/igt@kms_color_chamelium@pipe-a-gamma.html

  * igt@kms_color_chamelium@pipe-b-gamma:
    - shard-apl:          NOTRUN -> [SKIP][34] ([fdo#109271] / [fdo#111827]) +17 similar issues
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl8/igt@kms_color_chamelium@pipe-b-gamma.html

  * igt@kms_color_chamelium@pipe-c-ctm-0-25:
    - shard-kbl:          NOTRUN -> [SKIP][35] ([fdo#109271] / [fdo#111827]) +1 similar issue
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl1/igt@kms_color_chamelium@pipe-c-ctm-0-25.html

  * igt@kms_color_chamelium@pipe-d-degamma:
    - shard-glk:          NOTRUN -> [SKIP][36] ([fdo#109271] / [fdo#111827]) +4 similar issues
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk6/igt@kms_color_chamelium@pipe-d-degamma.html

  * igt@kms_color_chamelium@pipe-invalid-ctm-matrix-sizes:
    - shard-snb:          NOTRUN -> [SKIP][37] ([fdo#109271] / [fdo#111827]) +25 similar issues
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-snb5/igt@kms_color_chamelium@pipe-invalid-ctm-matrix-sizes.html

  * igt@kms_content_protection@atomic-dpms:
    - shard-apl:          NOTRUN -> [TIMEOUT][38] ([i915#1319]) +1 similar issue
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl7/igt@kms_content_protection@atomic-dpms.html

  * igt@kms_cursor_crc@pipe-a-cursor-32x32-rapid-movement:
    - shard-tglb:         NOTRUN -> [SKIP][39] ([i915#3319])
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-tglb6/igt@kms_cursor_crc@pipe-a-cursor-32x32-rapid-movement.html

  * igt@kms_cursor_crc@pipe-b-cursor-suspend:
    - shard-apl:          [PASS][40] -> [DMESG-WARN][41] ([i915#180])
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-apl6/igt@kms_cursor_crc@pipe-b-cursor-suspend.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl6/igt@kms_cursor_crc@pipe-b-cursor-suspend.html

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions:
    - shard-skl:          [PASS][42] -> [FAIL][43] ([i915#2346])
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl2/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl2/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html

  * igt@kms_flip@2x-flip-vs-modeset-vs-hang:
    - shard-kbl:          NOTRUN -> [SKIP][44] ([fdo#109271]) +25 similar issues
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl6/igt@kms_flip@2x-flip-vs-modeset-vs-hang.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@c-edp1:
    - shard-skl:          NOTRUN -> [FAIL][45] ([i915#79])
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl7/igt@kms_flip@flip-vs-expired-vblank-interruptible@c-edp1.html

  * igt@kms_flip@flip-vs-suspend@b-edp1:
    - shard-skl:          [PASS][46] -> [INCOMPLETE][47] ([i915#198])
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl6/igt@kms_flip@flip-vs-suspend@b-edp1.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl4/igt@kms_flip@flip-vs-suspend@b-edp1.html

  * igt@kms_flip@plain-flip-fb-recreate-interruptible@b-edp1:
    - shard-skl:          [PASS][48] -> [FAIL][49] ([i915#2122])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl5/igt@kms_flip@plain-flip-fb-recreate-interruptible@b-edp1.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl10/igt@kms_flip@plain-flip-fb-recreate-interruptible@b-edp1.html

  * igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs:
    - shard-skl:          NOTRUN -> [SKIP][50] ([fdo#109271] / [i915#2672])
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl5/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs.html
    - shard-apl:          NOTRUN -> [SKIP][51] ([fdo#109271] / [i915#2672])
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl3/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-pri-indfb-draw-blt:
    - shard-skl:          NOTRUN -> [SKIP][52] ([fdo#109271]) +135 similar issues
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl6/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-pri-indfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@psr-rgb565-draw-mmap-cpu:
    - shard-glk:          NOTRUN -> [SKIP][53] ([fdo#109271]) +28 similar issues
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk6/igt@kms_frontbuffer_tracking@psr-rgb565-draw-mmap-cpu.html

  * igt@kms_hdr@bpc-switch-dpms:
    - shard-skl:          NOTRUN -> [FAIL][54] ([i915#1188])
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl5/igt@kms_hdr@bpc-switch-dpms.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
    - shard-apl:          NOTRUN -> [SKIP][55] ([fdo#109271] / [i915#533]) +1 similar issue
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl2/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d.html

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-d:
    - shard-skl:          NOTRUN -> [SKIP][56] ([fdo#109271] / [i915#533]) +1 similar issue
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl1/igt@kms_pipe_crc_basic@hang-read-crc-pipe-d.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-d:
    - shard-glk:          NOTRUN -> [SKIP][57] ([fdo#109271] / [i915#533])
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk6/igt@kms_pipe_crc_basic@read-crc-pipe-d.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - shard-kbl:          NOTRUN -> [DMESG-WARN][58] ([i915#180])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl6/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  * igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
    - shard-skl:          [PASS][59] -> [FAIL][60] ([fdo#108145] / [i915#265])
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl9/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl9/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max:
    - shard-apl:          NOTRUN -> [FAIL][61] ([fdo#108145] / [i915#265]) +1 similar issue
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl8/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max.html

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          NOTRUN -> [FAIL][62] ([fdo#108145] / [i915#265]) +1 similar issue
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl5/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html

  * igt@kms_plane_alpha_blend@pipe-c-alpha-transparent-fb:
    - shard-apl:          NOTRUN -> [FAIL][63] ([i915#265]) +1 similar issue
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl7/igt@kms_plane_alpha_blend@pipe-c-alpha-transparent-fb.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-max:
    - shard-glk:          NOTRUN -> [FAIL][64] ([fdo#108145] / [i915#265])
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk6/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-max.html

  * igt@kms_plane_scaling@scaler-with-clipping-clamping@pipe-c-scaler-with-clipping-clamping:
    - shard-apl:          NOTRUN -> [SKIP][65] ([fdo#109271] / [i915#2733])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl6/igt@kms_plane_scaling@scaler-with-clipping-clamping@pipe-c-scaler-with-clipping-clamping.html

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-3:
    - shard-apl:          NOTRUN -> [SKIP][66] ([fdo#109271] / [i915#658]) +4 similar issues
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl7/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-3.html

  * igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-4:
    - shard-skl:          NOTRUN -> [SKIP][67] ([fdo#109271] / [i915#658]) +3 similar issues
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl1/igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-4.html

  * igt@kms_psr2_sf@plane-move-sf-dmg-area-0:
    - shard-glk:          NOTRUN -> [SKIP][68] ([fdo#109271] / [i915#658])
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk6/igt@kms_psr2_sf@plane-move-sf-dmg-area-0.html

  * igt@kms_psr@psr2_sprite_render:
    - shard-tglb:         NOTRUN -> [FAIL][69] ([i915#132])
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-tglb6/igt@kms_psr@psr2_sprite_render.html

  * igt@kms_psr@psr2_suspend:
    - shard-iclb:         [PASS][70] -> [SKIP][71] ([fdo#109441]) +1 similar issue
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb2/igt@kms_psr@psr2_suspend.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb1/igt@kms_psr@psr2_suspend.html

  * igt@kms_vblank@pipe-d-wait-forked-hang:
    - shard-apl:          NOTRUN -> [SKIP][72] ([fdo#109271]) +201 similar issues
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl7/igt@kms_vblank@pipe-d-wait-forked-hang.html

  * igt@kms_writeback@writeback-fb-id:
    - shard-skl:          NOTRUN -> [SKIP][73] ([fdo#109271] / [i915#2437])
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl1/igt@kms_writeback@writeback-fb-id.html

  * igt@kms_writeback@writeback-pixel-formats:
    - shard-apl:          NOTRUN -> [SKIP][74] ([fdo#109271] / [i915#2437])
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl2/igt@kms_writeback@writeback-pixel-formats.html

  * igt@prime_vgem@sync@rcs0:
    - shard-skl:          [PASS][75] -> [INCOMPLETE][76] ([i915#409])
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl5/igt@prime_vgem@sync@rcs0.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl10/igt@prime_vgem@sync@rcs0.html

  * igt@sysfs_clients@fair-0:
    - shard-skl:          NOTRUN -> [SKIP][77] ([fdo#109271] / [i915#2994]) +1 similar issue
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl9/igt@sysfs_clients@fair-0.html

  * igt@sysfs_clients@fair-7:
    - shard-apl:          NOTRUN -> [SKIP][78] ([fdo#109271] / [i915#2994])
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl2/igt@sysfs_clients@fair-7.html

  * igt@sysfs_clients@split-50:
    - shard-kbl:          NOTRUN -> [SKIP][79] ([fdo#109271] / [i915#2994]) +1 similar issue
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl6/igt@sysfs_clients@split-50.html

  
#### Possible fixes ####

  * igt@gem_eio@in-flight-internal-immediate:
    - shard-skl:          [DMESG-WARN][80] -> [PASS][81]
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl6/igt@gem_eio@in-flight-internal-immediate.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl4/igt@gem_eio@in-flight-internal-immediate.html

  * igt@gem_exec_fair@basic-pace@rcs0:
    - shard-tglb:         [FAIL][82] ([i915#2842]) -> [PASS][83]
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-tglb8/igt@gem_exec_fair@basic-pace@rcs0.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-tglb2/igt@gem_exec_fair@basic-pace@rcs0.html

  * igt@gem_exec_fair@basic-throttle@rcs0:
    - shard-glk:          [FAIL][84] ([i915#2842]) -> [PASS][85]
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-glk6/igt@gem_exec_fair@basic-throttle@rcs0.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk3/igt@gem_exec_fair@basic-throttle@rcs0.html

  * igt@gem_mmap_gtt@cpuset-big-copy-xy:
    - shard-iclb:         [FAIL][86] ([i915#2428]) -> [PASS][87]
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb1/igt@gem_mmap_gtt@cpuset-big-copy-xy.html
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb3/igt@gem_mmap_gtt@cpuset-big-copy-xy.html

  * igt@gem_workarounds@suspend-resume-context:
    - shard-apl:          [DMESG-WARN][88] ([i915#180]) -> [PASS][89]
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-apl8/igt@gem_workarounds@suspend-resume-context.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl3/igt@gem_workarounds@suspend-resume-context.html

  * igt@kms_cursor_edge_walk@pipe-c-64x64-top-edge:
    - shard-skl:          [DMESG-WARN][90] ([i915#1982]) -> [PASS][91]
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl9/igt@kms_cursor_edge_walk@pipe-c-64x64-top-edge.html
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl1/igt@kms_cursor_edge_walk@pipe-c-64x64-top-edge.html

  * igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ac-hdmi-a1-hdmi-a2:
    - shard-glk:          [FAIL][92] ([i915#79]) -> [PASS][93]
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-glk6/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ac-hdmi-a1-hdmi-a2.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk7/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ac-hdmi-a1-hdmi-a2.html

  * igt@kms_flip@flip-vs-suspend-interruptible@b-edp1:
    - shard-skl:          [INCOMPLETE][94] ([i915#198]) -> [PASS][95]
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl7/igt@kms_flip@flip-vs-suspend-interruptible@b-edp1.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl1/igt@kms_flip@flip-vs-suspend-interruptible@b-edp1.html

  * igt@kms_flip@flip-vs-suspend@a-dp1:
    - shard-kbl:          [INCOMPLETE][96] ([i915#155] / [i915#180]) -> [PASS][97]
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-kbl6/igt@kms_flip@flip-vs-suspend@a-dp1.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl1/igt@kms_flip@flip-vs-suspend@a-dp1.html

  * igt@kms_flip@wf_vblank-ts-check-interruptible@a-edp1:
    - shard-skl:          [FAIL][98] ([i915#2122]) -> [PASS][99]
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl7/igt@kms_flip@wf_vblank-ts-check-interruptible@a-edp1.html
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl1/igt@kms_flip@wf_vblank-ts-check-interruptible@a-edp1.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [FAIL][100] ([fdo#108145] / [i915#265]) -> [PASS][101]
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl6/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl4/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [SKIP][102] ([fdo#109441]) -> [PASS][103]
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb1/igt@kms_psr@psr2_cursor_render.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb2/igt@kms_psr@psr2_cursor_render.html

  * igt@perf@blocking:
    - shard-skl:          [FAIL][104] ([i915#1542]) -> [PASS][105]
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl8/igt@perf@blocking.html
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl10/igt@perf@blocking.html

  * igt@perf_pmu@rc6-suspend:
    - shard-kbl:          [DMESG-WARN][106] ([i915#180]) -> [PASS][107]
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-kbl7/igt@perf_pmu@rc6-suspend.html
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl6/igt@perf_pmu@rc6-suspend.html

  
#### Warnings ####

  * igt@gem_exec_fair@basic-throttle@rcs0:
    - shard-iclb:         [FAIL][108] ([i915#2849]) -> [FAIL][109] ([i915#2842])
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb6/igt@gem_exec_fair@basic-throttle@rcs0.html
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb7/igt@gem_exec_fair@basic-throttle@rcs0.html

  * igt@i915_pm_rc6_residency@rc6-idle:
    - shard-iclb:         [WARN][110] ([i915#2681] / [i915#2684]) -> [WARN][111] ([i915#1804] / [i915#2684])
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb1/igt@i915_pm_rc6_residency@rc6-idle.html
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb4/igt@i915_pm_rc6_residency@rc6-idle.html

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-1:
    - shard-iclb:         [SKIP][112] ([i915#658]) -> [SKIP][113] ([i915#2920]) +1 similar issue
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb3/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-1.html
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb2/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-1.html

  * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-3:
    - shard-iclb:         [SKIP][114] ([i915#2920]) -> [SKIP][115] ([i915#658]) +1 similar issue
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-iclb2/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-3.html
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-iclb1/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-3.html

  * igt@runner@aborted:
    - shard-kbl:          ([FAIL][116], [FAIL][117], [FAIL][118], [FAIL][119], [FAIL][120], [FAIL][121]) ([i915#180] / [i915#2292] / [i915#2505] / [i915#3002] / [i915#92]) -> ([FAIL][122], [FAIL][123], [FAIL][124], [FAIL][125], [FAIL][126]) ([i915#180] / [i915#1814] / [i915#2505] / [i915#3002] / [i915#92])
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-kbl3/igt@runner@aborted.html
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-kbl7/igt@runner@aborted.html
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-kbl2/igt@runner@aborted.html
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-kbl6/igt@runner@aborted.html
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-kbl6/igt@runner@aborted.html
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-kbl2/igt@runner@aborted.html
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl7/igt@runner@aborted.html
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl4/igt@runner@aborted.html
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl2/igt@runner@aborted.html
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl4/igt@runner@aborted.html
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-kbl6/igt@runner@aborted.html
    - shard-apl:          ([FAIL][127], [FAIL][128]) ([fdo#109271] / [i915#180] / [i915#3002]) -> ([FAIL][129], [FAIL][130], [FAIL][131]) ([i915#1814] / [i915#3002])
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-apl8/igt@runner@aborted.html
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-apl8/igt@runner@aborted.html
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl6/igt@runner@aborted.html
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl8/igt@runner@aborted.html
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-apl6/igt@runner@aborted.html
    - shard-glk:          ([FAIL][132], [FAIL][133]) ([i915#3002] / [k.org#202321]) -> ([FAIL][134], [FAIL][135], [FAIL][136]) ([i915#2722] / [i915#3002] / [k.org#202321])
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-glk9/igt@runner@aborted.html
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-glk4/igt@runner@aborted.html
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk4/igt@runner@aborted.html
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk1/igt@runner@aborted.html
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-glk3/igt@runner@aborted.html
    - shard-skl:          ([FAIL][137], [FAIL][138], [FAIL][139], [FAIL][140], [FAIL][141], [FAIL][142]) ([i915#1436] / [i915#1814] / [i915#2029] / [i915#3002]) -> ([FAIL][143], [FAIL][144], [FAIL][145], [FAIL][146], [FAIL][147], [FAIL][148]) ([i915#1436] / [i915#1814] / [i915#2029] / [i915#2426] / [i915#3002] / [i915#409])
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl3/igt@runner@aborted.html
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl10/igt@runner@aborted.html
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl4/igt@runner@aborted.html
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl2/igt@runner@aborted.html
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl5/igt@runner@aborted.html
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10027/shard-skl3/igt@runner@aborted.html
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl2/igt@runner@aborted.html
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl5/igt@runner@aborted.html
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl2/igt@runner@aborted.html
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl10/igt@runner@aborted.html
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl2/igt@runner@aborted.html
   [148]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/shard-skl6/igt@runner@aborted.html

  
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109284]: https://bugs.freedesktop.org/show_bug.cgi?id=109284
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#111304]: https://bugs.freedesktop.org/show_bug.cgi?id=111304
  [fdo#111615]: https://bugs.freedesktop.org/show_bug.cgi?id=111615
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1099]: https://gitlab.freedesktop.org/drm/intel/issues/1099
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1319]: https://gitlab.freedesktop.org/drm/intel/issues/1319
  [i915#132]: https://gitlab.freedesktop.org/drm/intel/issues/132
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#155]: https://gitlab.fre

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20032/index.html

[-- Attachment #1.2: Type: text/html, Size: 36019 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle with exiting non-persistent context
  2021-04-29  9:46 ` [Intel-gfx] " Tvrtko Ursulin
@ 2021-04-30 11:48   ` Daniel Vetter
  -1 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2021-04-30 11:48 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx, Chris Wilson, dri-devel, Zhen Han

On Thu, Apr 29, 2021 at 10:46:40AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> When a non-persistent context exits we currently mark it as banned in
> order to trigger fast termination of any outstanding GPU jobs it may have
> left running.
> 
> In doing so we apply a very strict 1ms limit in which the left over job
> has to preempt before we issues an engine resets.
> 
> Some workloads are not able to cleanly preempt in that time window and it
> can be argued that it would instead be better to give them a bit more
> grace since avoiding engine resets is generally preferrable.

Can you pls explain here why this is preferrable?

> To achieve this the patch splits handling of banned contexts from simply
> exited non-persistent ones and then applies different timeouts for both
> and also extends the criteria which determines if a request should be
> scheduled back in after preemption or not.
> 
> 15ms preempt timeout grace is given to exited non-persistent contexts
> which have been empirically tested to satisfy customers requirements
> and still provides reasonably quick cleanup post exit.

Same here, a bit more detail on what exactly was the problem to be fixed
is needed.
-Daniel

> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Zhen Han <zhen.han@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c     | 15 +++++++++------
>  drivers/gpu/drm/i915/gt/intel_context.h         | 17 +++++++++++++++++
>  drivers/gpu/drm/i915/gt/intel_context_types.h   |  1 +
>  .../drm/i915/gt/intel_execlists_submission.c    | 12 ++++++++++--
>  drivers/gpu/drm/i915/i915_request.c             |  2 +-
>  5 files changed, 38 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index fd8ee52e17a4..5a6eba1232cd 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
>  	return engine;
>  }
>  
> -static void kill_engines(struct i915_gem_engines *engines, bool ban)
> +static void
> +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
>  {
>  	struct i915_gem_engines_iter it;
>  	struct intel_context *ce;
> @@ -443,6 +444,8 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  
>  		if (ban && intel_context_set_banned(ce))
>  			continue;
> +		else if (!persistent && intel_context_set_non_persistent(ce))
> +			continue;
>  
>  		/*
>  		 * Check the current active state of this context; if we
> @@ -454,7 +457,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  		engine = active_engine(ce);
>  
>  		/* First attempt to gracefully cancel the context */
> -		if (engine && !__cancel_engine(engine) && ban)
> +		if (engine && !__cancel_engine(engine) && (ban || !persistent))
>  			/*
>  			 * If we are unable to send a preemptive pulse to bump
>  			 * the context from the GPU, we have to resort to a full
> @@ -466,8 +469,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  
>  static void kill_context(struct i915_gem_context *ctx)
>  {
> -	bool ban = (!i915_gem_context_is_persistent(ctx) ||
> -		    !ctx->i915->params.enable_hangcheck);
>  	struct i915_gem_engines *pos, *next;
>  
>  	spin_lock_irq(&ctx->stale.lock);
> @@ -480,7 +481,8 @@ static void kill_context(struct i915_gem_context *ctx)
>  
>  		spin_unlock_irq(&ctx->stale.lock);
>  
> -		kill_engines(pos, ban);
> +		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
> +			     i915_gem_context_is_persistent(ctx));
>  
>  		spin_lock_irq(&ctx->stale.lock);
>  		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
> @@ -526,7 +528,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
>  
>  kill:
>  	if (list_empty(&engines->link)) /* raced, already closed */
> -		kill_engines(engines, true);
> +		kill_engines(engines, true,
> +			     i915_gem_context_is_persistent(ctx));
>  
>  	i915_sw_fence_commit(&engines->fence);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index f83a73a2b39f..b0e812b8ce39 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -220,6 +220,23 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
>  	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
>  }
>  
> +static inline bool intel_context_is_non_persistent(const struct intel_context *ce)
> +{
> +	return test_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
> +}
> +
> +static inline bool intel_context_set_non_persistent(struct intel_context *ce)
> +{
> +	return test_and_set_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
> +}
> +
> +static inline bool intel_context_is_schedulable(const struct intel_context *ce)
> +{
> +	return !intel_context_is_banned(ce) &&
> +	       !(intel_context_is_closed(ce) &&
> +	         intel_context_is_non_persistent(ce));
> +}
> +
>  static inline bool
>  intel_context_force_single_submission(const struct intel_context *ce)
>  {
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index ed8c447a7346..aa949c539e93 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -95,6 +95,7 @@ struct intel_context {
>  #define CONTEXT_BANNED			6
>  #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
>  #define CONTEXT_NOPREEMPT		8
> +#define CONTEXT_NON_PERSISTENT		9 /* Only if also closed. */
>  
>  	struct {
>  		u64 timeout_us;
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index de124870af44..5ad7272fbbc4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
>  		     !intel_engine_has_heartbeat(engine)))
>  		intel_context_set_banned(ce);
>  
> -	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
> +	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
>  		reset_active(rq, engine);
>  
>  	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> @@ -1204,12 +1204,20 @@ static void record_preemption(struct intel_engine_execlists *execlists)
>  static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
>  					    const struct i915_request *rq)
>  {
> +	struct intel_context *ce;
> +
>  	if (!rq)
>  		return 0;
>  
> +	ce = rq->context;
> +
>  	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
> -	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
> +	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>  		return 1;
> +	/* Longer grace for closed non-persistent contexts to avoid resets. */
> +	else if (unlikely(intel_context_is_closed(ce) &&
> +			  intel_context_is_non_persistent(ce)))
> +		return 15;
>  
>  	return READ_ONCE(engine->props.preempt_timeout_ms);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index bec9c3652188..bbac87535923 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -660,7 +660,7 @@ bool __i915_request_submit(struct i915_request *request)
>  		goto active;
>  	}
>  
> -	if (unlikely(intel_context_is_banned(request->context)))
> +	if (unlikely(!intel_context_is_schedulable(request->context)))
>  		i915_request_set_error_once(request, -EIO);
>  
>  	if (unlikely(fatal_error(request->fence.error)))
> -- 
> 2.30.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle with exiting non-persistent context
@ 2021-04-30 11:48   ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2021-04-30 11:48 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx, Chris Wilson, dri-devel, Zhen Han

On Thu, Apr 29, 2021 at 10:46:40AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> When a non-persistent context exits we currently mark it as banned in
> order to trigger fast termination of any outstanding GPU jobs it may have
> left running.
> 
> In doing so we apply a very strict 1ms limit in which the left over job
> has to preempt before we issues an engine resets.
> 
> Some workloads are not able to cleanly preempt in that time window and it
> can be argued that it would instead be better to give them a bit more
> grace since avoiding engine resets is generally preferrable.

Can you pls explain here why this is preferrable?

> To achieve this the patch splits handling of banned contexts from simply
> exited non-persistent ones and then applies different timeouts for both
> and also extends the criteria which determines if a request should be
> scheduled back in after preemption or not.
> 
> 15ms preempt timeout grace is given to exited non-persistent contexts
> which have been empirically tested to satisfy customers requirements
> and still provides reasonably quick cleanup post exit.

Same here, a bit more detail on what exactly was the problem to be fixed
is needed.
-Daniel

> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Zhen Han <zhen.han@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c     | 15 +++++++++------
>  drivers/gpu/drm/i915/gt/intel_context.h         | 17 +++++++++++++++++
>  drivers/gpu/drm/i915/gt/intel_context_types.h   |  1 +
>  .../drm/i915/gt/intel_execlists_submission.c    | 12 ++++++++++--
>  drivers/gpu/drm/i915/i915_request.c             |  2 +-
>  5 files changed, 38 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index fd8ee52e17a4..5a6eba1232cd 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
>  	return engine;
>  }
>  
> -static void kill_engines(struct i915_gem_engines *engines, bool ban)
> +static void
> +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
>  {
>  	struct i915_gem_engines_iter it;
>  	struct intel_context *ce;
> @@ -443,6 +444,8 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  
>  		if (ban && intel_context_set_banned(ce))
>  			continue;
> +		else if (!persistent && intel_context_set_non_persistent(ce))
> +			continue;
>  
>  		/*
>  		 * Check the current active state of this context; if we
> @@ -454,7 +457,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  		engine = active_engine(ce);
>  
>  		/* First attempt to gracefully cancel the context */
> -		if (engine && !__cancel_engine(engine) && ban)
> +		if (engine && !__cancel_engine(engine) && (ban || !persistent))
>  			/*
>  			 * If we are unable to send a preemptive pulse to bump
>  			 * the context from the GPU, we have to resort to a full
> @@ -466,8 +469,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>  
>  static void kill_context(struct i915_gem_context *ctx)
>  {
> -	bool ban = (!i915_gem_context_is_persistent(ctx) ||
> -		    !ctx->i915->params.enable_hangcheck);
>  	struct i915_gem_engines *pos, *next;
>  
>  	spin_lock_irq(&ctx->stale.lock);
> @@ -480,7 +481,8 @@ static void kill_context(struct i915_gem_context *ctx)
>  
>  		spin_unlock_irq(&ctx->stale.lock);
>  
> -		kill_engines(pos, ban);
> +		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
> +			     i915_gem_context_is_persistent(ctx));
>  
>  		spin_lock_irq(&ctx->stale.lock);
>  		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
> @@ -526,7 +528,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
>  
>  kill:
>  	if (list_empty(&engines->link)) /* raced, already closed */
> -		kill_engines(engines, true);
> +		kill_engines(engines, true,
> +			     i915_gem_context_is_persistent(ctx));
>  
>  	i915_sw_fence_commit(&engines->fence);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index f83a73a2b39f..b0e812b8ce39 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -220,6 +220,23 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
>  	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
>  }
>  
> +static inline bool intel_context_is_non_persistent(const struct intel_context *ce)
> +{
> +	return test_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
> +}
> +
> +static inline bool intel_context_set_non_persistent(struct intel_context *ce)
> +{
> +	return test_and_set_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
> +}
> +
> +static inline bool intel_context_is_schedulable(const struct intel_context *ce)
> +{
> +	return !intel_context_is_banned(ce) &&
> +	       !(intel_context_is_closed(ce) &&
> +	         intel_context_is_non_persistent(ce));
> +}
> +
>  static inline bool
>  intel_context_force_single_submission(const struct intel_context *ce)
>  {
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index ed8c447a7346..aa949c539e93 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -95,6 +95,7 @@ struct intel_context {
>  #define CONTEXT_BANNED			6
>  #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
>  #define CONTEXT_NOPREEMPT		8
> +#define CONTEXT_NON_PERSISTENT		9 /* Only if also closed. */
>  
>  	struct {
>  		u64 timeout_us;
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index de124870af44..5ad7272fbbc4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
>  		     !intel_engine_has_heartbeat(engine)))
>  		intel_context_set_banned(ce);
>  
> -	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
> +	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
>  		reset_active(rq, engine);
>  
>  	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> @@ -1204,12 +1204,20 @@ static void record_preemption(struct intel_engine_execlists *execlists)
>  static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
>  					    const struct i915_request *rq)
>  {
> +	struct intel_context *ce;
> +
>  	if (!rq)
>  		return 0;
>  
> +	ce = rq->context;
> +
>  	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
> -	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
> +	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>  		return 1;
> +	/* Longer grace for closed non-persistent contexts to avoid resets. */
> +	else if (unlikely(intel_context_is_closed(ce) &&
> +			  intel_context_is_non_persistent(ce)))
> +		return 15;
>  
>  	return READ_ONCE(engine->props.preempt_timeout_ms);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index bec9c3652188..bbac87535923 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -660,7 +660,7 @@ bool __i915_request_submit(struct i915_request *request)
>  		goto active;
>  	}
>  
> -	if (unlikely(intel_context_is_banned(request->context)))
> +	if (unlikely(!intel_context_is_schedulable(request->context)))
>  		i915_request_set_error_once(request, -EIO);
>  
>  	if (unlikely(fatal_error(request->fence.error)))
> -- 
> 2.30.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle with exiting non-persistent context
  2021-04-30 11:48   ` Daniel Vetter
@ 2021-04-30 13:27     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2021-04-30 13:27 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel-gfx, Chris Wilson, dri-devel, Zhen Han


On 30/04/2021 12:48, Daniel Vetter wrote:
> On Thu, Apr 29, 2021 at 10:46:40AM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> When a non-persistent context exits we currently mark it as banned in
>> order to trigger fast termination of any outstanding GPU jobs it may have
>> left running.
>>
>> In doing so we apply a very strict 1ms limit in which the left over job
>> has to preempt before we issues an engine resets.
>>
>> Some workloads are not able to cleanly preempt in that time window and it
>> can be argued that it would instead be better to give them a bit more
>> grace since avoiding engine resets is generally preferrable.
> 
> Can you pls explain here why this is preferrable?

I think there's always the risk of an innocent request getting axed with 
preempt to busy being very asynchronous and also engine reset can 
sometimes fail as well.

>> To achieve this the patch splits handling of banned contexts from simply
>> exited non-persistent ones and then applies different timeouts for both
>> and also extends the criteria which determines if a request should be
>> scheduled back in after preemption or not.
>>
>> 15ms preempt timeout grace is given to exited non-persistent contexts
>> which have been empirically tested to satisfy customers requirements
>> and still provides reasonably quick cleanup post exit.
> 
> Same here, a bit more detail on what exactly was the problem to be fixed
> is needed.

It is a bit multi-faceted. Start with how in some cultures errors 
messages are much bigger error flags than in others and much more 
difficult to hand-wave "oh that's not a problem really". The the 
previous considerations about why not avoid engine reset if we can. Add 
on top how non-persistent context exiting is not really an error worthy 
of a reset, *if* it exits cleanly reasonably quickly.

You could say make it clean up for itself before it exits, not a kernel 
problem. But on the balance of everything, to me it sounds saleable to 
just give it some longer time compared to banned contexts, which are the 
unquestionably naughty/dangerous ones. Also, how fast non-persistent 
contexts will be cleaned up hasn't been defined really. As long as 15ms 
is an order of magnitude, plus some, shorter than the normal preempt 
timeout I think it is fine.

Regards,

Tvrtko

P.S. Otherwise I plan to respin v2 with consolidated CONTEXT_SCHEDULABLE 
flag so fast paths do not have to do too many individual checks.

> -Daniel
> 
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Zhen Han <zhen.han@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_context.c     | 15 +++++++++------
>>   drivers/gpu/drm/i915/gt/intel_context.h         | 17 +++++++++++++++++
>>   drivers/gpu/drm/i915/gt/intel_context_types.h   |  1 +
>>   .../drm/i915/gt/intel_execlists_submission.c    | 12 ++++++++++--
>>   drivers/gpu/drm/i915/i915_request.c             |  2 +-
>>   5 files changed, 38 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> index fd8ee52e17a4..5a6eba1232cd 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> @@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
>>   	return engine;
>>   }
>>   
>> -static void kill_engines(struct i915_gem_engines *engines, bool ban)
>> +static void
>> +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
>>   {
>>   	struct i915_gem_engines_iter it;
>>   	struct intel_context *ce;
>> @@ -443,6 +444,8 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>>   
>>   		if (ban && intel_context_set_banned(ce))
>>   			continue;
>> +		else if (!persistent && intel_context_set_non_persistent(ce))
>> +			continue;
>>   
>>   		/*
>>   		 * Check the current active state of this context; if we
>> @@ -454,7 +457,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>>   		engine = active_engine(ce);
>>   
>>   		/* First attempt to gracefully cancel the context */
>> -		if (engine && !__cancel_engine(engine) && ban)
>> +		if (engine && !__cancel_engine(engine) && (ban || !persistent))
>>   			/*
>>   			 * If we are unable to send a preemptive pulse to bump
>>   			 * the context from the GPU, we have to resort to a full
>> @@ -466,8 +469,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>>   
>>   static void kill_context(struct i915_gem_context *ctx)
>>   {
>> -	bool ban = (!i915_gem_context_is_persistent(ctx) ||
>> -		    !ctx->i915->params.enable_hangcheck);
>>   	struct i915_gem_engines *pos, *next;
>>   
>>   	spin_lock_irq(&ctx->stale.lock);
>> @@ -480,7 +481,8 @@ static void kill_context(struct i915_gem_context *ctx)
>>   
>>   		spin_unlock_irq(&ctx->stale.lock);
>>   
>> -		kill_engines(pos, ban);
>> +		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
>> +			     i915_gem_context_is_persistent(ctx));
>>   
>>   		spin_lock_irq(&ctx->stale.lock);
>>   		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
>> @@ -526,7 +528,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
>>   
>>   kill:
>>   	if (list_empty(&engines->link)) /* raced, already closed */
>> -		kill_engines(engines, true);
>> +		kill_engines(engines, true,
>> +			     i915_gem_context_is_persistent(ctx));
>>   
>>   	i915_sw_fence_commit(&engines->fence);
>>   }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
>> index f83a73a2b39f..b0e812b8ce39 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
>> @@ -220,6 +220,23 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
>>   	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
>>   }
>>   
>> +static inline bool intel_context_is_non_persistent(const struct intel_context *ce)
>> +{
>> +	return test_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
>> +}
>> +
>> +static inline bool intel_context_set_non_persistent(struct intel_context *ce)
>> +{
>> +	return test_and_set_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
>> +}
>> +
>> +static inline bool intel_context_is_schedulable(const struct intel_context *ce)
>> +{
>> +	return !intel_context_is_banned(ce) &&
>> +	       !(intel_context_is_closed(ce) &&
>> +	         intel_context_is_non_persistent(ce));
>> +}
>> +
>>   static inline bool
>>   intel_context_force_single_submission(const struct intel_context *ce)
>>   {
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> index ed8c447a7346..aa949c539e93 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> @@ -95,6 +95,7 @@ struct intel_context {
>>   #define CONTEXT_BANNED			6
>>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
>>   #define CONTEXT_NOPREEMPT		8
>> +#define CONTEXT_NON_PERSISTENT		9 /* Only if also closed. */
>>   
>>   	struct {
>>   		u64 timeout_us;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> index de124870af44..5ad7272fbbc4 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
>>   		     !intel_engine_has_heartbeat(engine)))
>>   		intel_context_set_banned(ce);
>>   
>> -	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>> +	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
>>   		reset_active(rq, engine);
>>   
>>   	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>> @@ -1204,12 +1204,20 @@ static void record_preemption(struct intel_engine_execlists *execlists)
>>   static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
>>   					    const struct i915_request *rq)
>>   {
>> +	struct intel_context *ce;
>> +
>>   	if (!rq)
>>   		return 0;
>>   
>> +	ce = rq->context;
>> +
>>   	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
>> -	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
>> +	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>>   		return 1;
>> +	/* Longer grace for closed non-persistent contexts to avoid resets. */
>> +	else if (unlikely(intel_context_is_closed(ce) &&
>> +			  intel_context_is_non_persistent(ce)))
>> +		return 15;
>>   
>>   	return READ_ONCE(engine->props.preempt_timeout_ms);
>>   }
>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>> index bec9c3652188..bbac87535923 100644
>> --- a/drivers/gpu/drm/i915/i915_request.c
>> +++ b/drivers/gpu/drm/i915/i915_request.c
>> @@ -660,7 +660,7 @@ bool __i915_request_submit(struct i915_request *request)
>>   		goto active;
>>   	}
>>   
>> -	if (unlikely(intel_context_is_banned(request->context)))
>> +	if (unlikely(!intel_context_is_schedulable(request->context)))
>>   		i915_request_set_error_once(request, -EIO);
>>   
>>   	if (unlikely(fatal_error(request->fence.error)))
>> -- 
>> 2.30.2
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle with exiting non-persistent context
@ 2021-04-30 13:27     ` Tvrtko Ursulin
  0 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2021-04-30 13:27 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel-gfx, Chris Wilson, dri-devel, Zhen Han


On 30/04/2021 12:48, Daniel Vetter wrote:
> On Thu, Apr 29, 2021 at 10:46:40AM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> When a non-persistent context exits we currently mark it as banned in
>> order to trigger fast termination of any outstanding GPU jobs it may have
>> left running.
>>
>> In doing so we apply a very strict 1ms limit in which the left over job
>> has to preempt before we issues an engine resets.
>>
>> Some workloads are not able to cleanly preempt in that time window and it
>> can be argued that it would instead be better to give them a bit more
>> grace since avoiding engine resets is generally preferrable.
> 
> Can you pls explain here why this is preferrable?

I think there's always the risk of an innocent request getting axed with 
preempt to busy being very asynchronous and also engine reset can 
sometimes fail as well.

>> To achieve this the patch splits handling of banned contexts from simply
>> exited non-persistent ones and then applies different timeouts for both
>> and also extends the criteria which determines if a request should be
>> scheduled back in after preemption or not.
>>
>> 15ms preempt timeout grace is given to exited non-persistent contexts
>> which have been empirically tested to satisfy customers requirements
>> and still provides reasonably quick cleanup post exit.
> 
> Same here, a bit more detail on what exactly was the problem to be fixed
> is needed.

It is a bit multi-faceted. Start with how in some cultures errors 
messages are much bigger error flags than in others and much more 
difficult to hand-wave "oh that's not a problem really". The the 
previous considerations about why not avoid engine reset if we can. Add 
on top how non-persistent context exiting is not really an error worthy 
of a reset, *if* it exits cleanly reasonably quickly.

You could say make it clean up for itself before it exits, not a kernel 
problem. But on the balance of everything, to me it sounds saleable to 
just give it some longer time compared to banned contexts, which are the 
unquestionably naughty/dangerous ones. Also, how fast non-persistent 
contexts will be cleaned up hasn't been defined really. As long as 15ms 
is an order of magnitude, plus some, shorter than the normal preempt 
timeout I think it is fine.

Regards,

Tvrtko

P.S. Otherwise I plan to respin v2 with consolidated CONTEXT_SCHEDULABLE 
flag so fast paths do not have to do too many individual checks.

> -Daniel
> 
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Zhen Han <zhen.han@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_context.c     | 15 +++++++++------
>>   drivers/gpu/drm/i915/gt/intel_context.h         | 17 +++++++++++++++++
>>   drivers/gpu/drm/i915/gt/intel_context_types.h   |  1 +
>>   .../drm/i915/gt/intel_execlists_submission.c    | 12 ++++++++++--
>>   drivers/gpu/drm/i915/i915_request.c             |  2 +-
>>   5 files changed, 38 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> index fd8ee52e17a4..5a6eba1232cd 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> @@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
>>   	return engine;
>>   }
>>   
>> -static void kill_engines(struct i915_gem_engines *engines, bool ban)
>> +static void
>> +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
>>   {
>>   	struct i915_gem_engines_iter it;
>>   	struct intel_context *ce;
>> @@ -443,6 +444,8 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>>   
>>   		if (ban && intel_context_set_banned(ce))
>>   			continue;
>> +		else if (!persistent && intel_context_set_non_persistent(ce))
>> +			continue;
>>   
>>   		/*
>>   		 * Check the current active state of this context; if we
>> @@ -454,7 +457,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>>   		engine = active_engine(ce);
>>   
>>   		/* First attempt to gracefully cancel the context */
>> -		if (engine && !__cancel_engine(engine) && ban)
>> +		if (engine && !__cancel_engine(engine) && (ban || !persistent))
>>   			/*
>>   			 * If we are unable to send a preemptive pulse to bump
>>   			 * the context from the GPU, we have to resort to a full
>> @@ -466,8 +469,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
>>   
>>   static void kill_context(struct i915_gem_context *ctx)
>>   {
>> -	bool ban = (!i915_gem_context_is_persistent(ctx) ||
>> -		    !ctx->i915->params.enable_hangcheck);
>>   	struct i915_gem_engines *pos, *next;
>>   
>>   	spin_lock_irq(&ctx->stale.lock);
>> @@ -480,7 +481,8 @@ static void kill_context(struct i915_gem_context *ctx)
>>   
>>   		spin_unlock_irq(&ctx->stale.lock);
>>   
>> -		kill_engines(pos, ban);
>> +		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
>> +			     i915_gem_context_is_persistent(ctx));
>>   
>>   		spin_lock_irq(&ctx->stale.lock);
>>   		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
>> @@ -526,7 +528,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
>>   
>>   kill:
>>   	if (list_empty(&engines->link)) /* raced, already closed */
>> -		kill_engines(engines, true);
>> +		kill_engines(engines, true,
>> +			     i915_gem_context_is_persistent(ctx));
>>   
>>   	i915_sw_fence_commit(&engines->fence);
>>   }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
>> index f83a73a2b39f..b0e812b8ce39 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
>> @@ -220,6 +220,23 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
>>   	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
>>   }
>>   
>> +static inline bool intel_context_is_non_persistent(const struct intel_context *ce)
>> +{
>> +	return test_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
>> +}
>> +
>> +static inline bool intel_context_set_non_persistent(struct intel_context *ce)
>> +{
>> +	return test_and_set_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
>> +}
>> +
>> +static inline bool intel_context_is_schedulable(const struct intel_context *ce)
>> +{
>> +	return !intel_context_is_banned(ce) &&
>> +	       !(intel_context_is_closed(ce) &&
>> +	         intel_context_is_non_persistent(ce));
>> +}
>> +
>>   static inline bool
>>   intel_context_force_single_submission(const struct intel_context *ce)
>>   {
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> index ed8c447a7346..aa949c539e93 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> @@ -95,6 +95,7 @@ struct intel_context {
>>   #define CONTEXT_BANNED			6
>>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
>>   #define CONTEXT_NOPREEMPT		8
>> +#define CONTEXT_NON_PERSISTENT		9 /* Only if also closed. */
>>   
>>   	struct {
>>   		u64 timeout_us;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> index de124870af44..5ad7272fbbc4 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
>> @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
>>   		     !intel_engine_has_heartbeat(engine)))
>>   		intel_context_set_banned(ce);
>>   
>> -	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>> +	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
>>   		reset_active(rq, engine);
>>   
>>   	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>> @@ -1204,12 +1204,20 @@ static void record_preemption(struct intel_engine_execlists *execlists)
>>   static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
>>   					    const struct i915_request *rq)
>>   {
>> +	struct intel_context *ce;
>> +
>>   	if (!rq)
>>   		return 0;
>>   
>> +	ce = rq->context;
>> +
>>   	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
>> -	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
>> +	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
>>   		return 1;
>> +	/* Longer grace for closed non-persistent contexts to avoid resets. */
>> +	else if (unlikely(intel_context_is_closed(ce) &&
>> +			  intel_context_is_non_persistent(ce)))
>> +		return 15;
>>   
>>   	return READ_ONCE(engine->props.preempt_timeout_ms);
>>   }
>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>> index bec9c3652188..bbac87535923 100644
>> --- a/drivers/gpu/drm/i915/i915_request.c
>> +++ b/drivers/gpu/drm/i915/i915_request.c
>> @@ -660,7 +660,7 @@ bool __i915_request_submit(struct i915_request *request)
>>   		goto active;
>>   	}
>>   
>> -	if (unlikely(intel_context_is_banned(request->context)))
>> +	if (unlikely(!intel_context_is_schedulable(request->context)))
>>   		i915_request_set_error_once(request, -EIO);
>>   
>>   	if (unlikely(fatal_error(request->fence.error)))
>> -- 
>> 2.30.2
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2] drm/i915: Be more gentle with exiting non-persistent context
  2021-04-29  9:46 ` [Intel-gfx] " Tvrtko Ursulin
@ 2021-04-30 14:10   ` Tvrtko Ursulin
  -1 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2021-04-30 14:10 UTC (permalink / raw)
  To: Intel-gfx; +Cc: Zhen Han, Chris Wilson, dri-devel, Tvrtko Ursulin

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When a non-persistent context exits we currently mark it as banned in
order to trigger fast termination of any outstanding GPU jobs it may have
left running.

In doing so we apply a very strict 1ms limit in which the left over job
has to preempt before we issues an engine resets.

Some workloads are not able to cleanly preempt in that time window and it
can be argued that it would instead be better to give them a bit more
grace since avoiding engine resets is generally preferrable.

To achieve this the patch splits handling of banned contexts from simply
exited non-persistent ones and then applies different timeouts for both
and also extends the criteria which determines if a request should be
scheduled back in after preemption or not.

15ms preempt timeout grace is given to exited non-persistent contexts
which have been empirically tested to satisfy customers requirements
and still provides reasonably quick cleanup post exit.

v2:
 * Streamline fast path checks.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhen Han <zhen.han@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 16 +++++++++------
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 ++
 drivers/gpu/drm/i915/gt/intel_context.h       | 20 +++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  2 ++
 .../drm/i915/gt/intel_execlists_submission.c  | 11 ++++++++--
 drivers/gpu/drm/i915/i915_request.c           |  2 +-
 6 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index fd8ee52e17a4..090c891f029b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
 	return engine;
 }
 
-static void kill_engines(struct i915_gem_engines *engines, bool ban)
+static void
+kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
 {
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -443,6 +444,9 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 		if (ban && intel_context_set_banned(ce))
 			continue;
+		else if (!persistent &&
+			 intel_context_set_closed_non_persistent(ce))
+			continue;
 
 		/*
 		 * Check the current active state of this context; if we
@@ -454,7 +458,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 		engine = active_engine(ce);
 
 		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
+		if (engine && !__cancel_engine(engine) && (ban || !persistent))
 			/*
 			 * If we are unable to send a preemptive pulse to bump
 			 * the context from the GPU, we have to resort to a full
@@ -466,8 +470,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 static void kill_context(struct i915_gem_context *ctx)
 {
-	bool ban = (!i915_gem_context_is_persistent(ctx) ||
-		    !ctx->i915->params.enable_hangcheck);
 	struct i915_gem_engines *pos, *next;
 
 	spin_lock_irq(&ctx->stale.lock);
@@ -480,7 +482,8 @@ static void kill_context(struct i915_gem_context *ctx)
 
 		spin_unlock_irq(&ctx->stale.lock);
 
-		kill_engines(pos, ban);
+		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+			     i915_gem_context_is_persistent(ctx));
 
 		spin_lock_irq(&ctx->stale.lock);
 		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -526,7 +529,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
 
 kill:
 	if (list_empty(&engines->link)) /* raced, already closed */
-		kill_engines(engines, true);
+		kill_engines(engines, true,
+			     i915_gem_context_is_persistent(ctx));
 
 	i915_sw_fence_commit(&engines->fence);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 17cf2640b082..efbda677a0a8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -374,6 +374,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
 	ce->sseu = engine->sseu;
 	ce->ring = __intel_context_ring_size(SZ_4K);
 
+	__set_bit(CONTEXT_SCHEDULABLE, &ce->flags);
+
 	ewma_runtime_init(&ce->runtime.avg);
 
 	ce->vm = i915_vm_get(engine->gt->vm);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index f83a73a2b39f..a6d4c2d7f2f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -217,9 +217,29 @@ static inline bool intel_context_is_banned(const struct intel_context *ce)
 
 static inline bool intel_context_set_banned(struct intel_context *ce)
 {
+	clear_bit(CONTEXT_SCHEDULABLE, &ce->flags);
 	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
 }
 
+static inline bool
+intel_context_is_closed_non_persistent(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
+}
+
+static inline bool
+intel_context_set_closed_non_persistent(struct intel_context *ce)
+{
+	GEM_BUG_ON(!intel_context_is_closed(ce));
+	clear_bit(CONTEXT_SCHEDULABLE, &ce->flags);
+	return test_and_set_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
+}
+
+static inline bool intel_context_is_schedulable(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_SCHEDULABLE, &ce->flags);
+}
+
 static inline bool
 intel_context_force_single_submission(const struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ed8c447a7346..2d07b821a20e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -95,6 +95,8 @@ struct intel_context {
 #define CONTEXT_BANNED			6
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
 #define CONTEXT_NOPREEMPT		8
+#define CONTEXT_SCHEDULABLE		9  /* Unless banned or non-persistent closed. */
+#define CONTEXT_NON_PERSISTENT		10 /* Only if also closed. */
 
 	struct {
 		u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de124870af44..3f3e2ee6a24d 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
 		     !intel_engine_has_heartbeat(engine)))
 		intel_context_set_banned(ce);
 
-	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
+	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
 		reset_active(rq, engine);
 
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -1204,12 +1204,19 @@ static void record_preemption(struct intel_engine_execlists *execlists)
 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
 					    const struct i915_request *rq)
 {
+	struct intel_context *ce;
+
 	if (!rq)
 		return 0;
 
+	ce = rq->context;
+
 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
-	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
+	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
 		return 1;
+	/* Longer grace for closed non-persistent contexts to avoid resets. */
+	else if (unlikely(intel_context_is_closed_non_persistent(ce)))
+		return 15;
 
 	return READ_ONCE(engine->props.preempt_timeout_ms);
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index bec9c3652188..bbac87535923 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -660,7 +660,7 @@ bool __i915_request_submit(struct i915_request *request)
 		goto active;
 	}
 
-	if (unlikely(intel_context_is_banned(request->context)))
+	if (unlikely(!intel_context_is_schedulable(request->context)))
 		i915_request_set_error_once(request, -EIO);
 
 	if (unlikely(fatal_error(request->fence.error)))
-- 
2.30.2

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH v2] drm/i915: Be more gentle with exiting non-persistent context
@ 2021-04-30 14:10   ` Tvrtko Ursulin
  0 siblings, 0 replies; 14+ messages in thread
From: Tvrtko Ursulin @ 2021-04-30 14:10 UTC (permalink / raw)
  To: Intel-gfx; +Cc: Zhen Han, Chris Wilson, dri-devel

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

When a non-persistent context exits we currently mark it as banned in
order to trigger fast termination of any outstanding GPU jobs it may have
left running.

In doing so we apply a very strict 1ms limit in which the left over job
has to preempt before we issues an engine resets.

Some workloads are not able to cleanly preempt in that time window and it
can be argued that it would instead be better to give them a bit more
grace since avoiding engine resets is generally preferrable.

To achieve this the patch splits handling of banned contexts from simply
exited non-persistent ones and then applies different timeouts for both
and also extends the criteria which determines if a request should be
scheduled back in after preemption or not.

15ms preempt timeout grace is given to exited non-persistent contexts
which have been empirically tested to satisfy customers requirements
and still provides reasonably quick cleanup post exit.

v2:
 * Streamline fast path checks.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhen Han <zhen.han@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 16 +++++++++------
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 ++
 drivers/gpu/drm/i915/gt/intel_context.h       | 20 +++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  2 ++
 .../drm/i915/gt/intel_execlists_submission.c  | 11 ++++++++--
 drivers/gpu/drm/i915/i915_request.c           |  2 +-
 6 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index fd8ee52e17a4..090c891f029b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
 	return engine;
 }
 
-static void kill_engines(struct i915_gem_engines *engines, bool ban)
+static void
+kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
 {
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -443,6 +444,9 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 		if (ban && intel_context_set_banned(ce))
 			continue;
+		else if (!persistent &&
+			 intel_context_set_closed_non_persistent(ce))
+			continue;
 
 		/*
 		 * Check the current active state of this context; if we
@@ -454,7 +458,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 		engine = active_engine(ce);
 
 		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
+		if (engine && !__cancel_engine(engine) && (ban || !persistent))
 			/*
 			 * If we are unable to send a preemptive pulse to bump
 			 * the context from the GPU, we have to resort to a full
@@ -466,8 +470,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 static void kill_context(struct i915_gem_context *ctx)
 {
-	bool ban = (!i915_gem_context_is_persistent(ctx) ||
-		    !ctx->i915->params.enable_hangcheck);
 	struct i915_gem_engines *pos, *next;
 
 	spin_lock_irq(&ctx->stale.lock);
@@ -480,7 +482,8 @@ static void kill_context(struct i915_gem_context *ctx)
 
 		spin_unlock_irq(&ctx->stale.lock);
 
-		kill_engines(pos, ban);
+		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+			     i915_gem_context_is_persistent(ctx));
 
 		spin_lock_irq(&ctx->stale.lock);
 		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -526,7 +529,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
 
 kill:
 	if (list_empty(&engines->link)) /* raced, already closed */
-		kill_engines(engines, true);
+		kill_engines(engines, true,
+			     i915_gem_context_is_persistent(ctx));
 
 	i915_sw_fence_commit(&engines->fence);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 17cf2640b082..efbda677a0a8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -374,6 +374,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
 	ce->sseu = engine->sseu;
 	ce->ring = __intel_context_ring_size(SZ_4K);
 
+	__set_bit(CONTEXT_SCHEDULABLE, &ce->flags);
+
 	ewma_runtime_init(&ce->runtime.avg);
 
 	ce->vm = i915_vm_get(engine->gt->vm);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index f83a73a2b39f..a6d4c2d7f2f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -217,9 +217,29 @@ static inline bool intel_context_is_banned(const struct intel_context *ce)
 
 static inline bool intel_context_set_banned(struct intel_context *ce)
 {
+	clear_bit(CONTEXT_SCHEDULABLE, &ce->flags);
 	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
 }
 
+static inline bool
+intel_context_is_closed_non_persistent(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
+}
+
+static inline bool
+intel_context_set_closed_non_persistent(struct intel_context *ce)
+{
+	GEM_BUG_ON(!intel_context_is_closed(ce));
+	clear_bit(CONTEXT_SCHEDULABLE, &ce->flags);
+	return test_and_set_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
+}
+
+static inline bool intel_context_is_schedulable(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_SCHEDULABLE, &ce->flags);
+}
+
 static inline bool
 intel_context_force_single_submission(const struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ed8c447a7346..2d07b821a20e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -95,6 +95,8 @@ struct intel_context {
 #define CONTEXT_BANNED			6
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
 #define CONTEXT_NOPREEMPT		8
+#define CONTEXT_SCHEDULABLE		9  /* Unless banned or non-persistent closed. */
+#define CONTEXT_NON_PERSISTENT		10 /* Only if also closed. */
 
 	struct {
 		u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de124870af44..3f3e2ee6a24d 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
 		     !intel_engine_has_heartbeat(engine)))
 		intel_context_set_banned(ce);
 
-	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
+	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
 		reset_active(rq, engine);
 
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -1204,12 +1204,19 @@ static void record_preemption(struct intel_engine_execlists *execlists)
 static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
 					    const struct i915_request *rq)
 {
+	struct intel_context *ce;
+
 	if (!rq)
 		return 0;
 
+	ce = rq->context;
+
 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
-	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
+	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
 		return 1;
+	/* Longer grace for closed non-persistent contexts to avoid resets. */
+	else if (unlikely(intel_context_is_closed_non_persistent(ce)))
+		return 15;
 
 	return READ_ONCE(engine->props.preempt_timeout_ms);
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index bec9c3652188..bbac87535923 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -660,7 +660,7 @@ bool __i915_request_submit(struct i915_request *request)
 		goto active;
 	}
 
-	if (unlikely(intel_context_is_banned(request->context)))
+	if (unlikely(!intel_context_is_schedulable(request->context)))
 		i915_request_set_error_once(request, -EIO);
 
 	if (unlikely(fatal_error(request->fence.error)))
-- 
2.30.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle with exiting non-persistent context
  2021-04-30 13:27     ` Tvrtko Ursulin
@ 2021-04-30 14:34       ` Daniel Vetter
  -1 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2021-04-30 14:34 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Chris Wilson, dri-devel, Zhen Han

On Fri, Apr 30, 2021 at 3:27 PM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
> On 30/04/2021 12:48, Daniel Vetter wrote:
> > On Thu, Apr 29, 2021 at 10:46:40AM +0100, Tvrtko Ursulin wrote:
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> When a non-persistent context exits we currently mark it as banned in
> >> order to trigger fast termination of any outstanding GPU jobs it may have
> >> left running.
> >>
> >> In doing so we apply a very strict 1ms limit in which the left over job
> >> has to preempt before we issues an engine resets.
> >>
> >> Some workloads are not able to cleanly preempt in that time window and it
> >> can be argued that it would instead be better to give them a bit more
> >> grace since avoiding engine resets is generally preferrable.
> >
> > Can you pls explain here why this is preferrable?
>
> I think there's always the risk of an innocent request getting axed with
> preempt to busy being very asynchronous and also engine reset can
> sometimes fail as well.
>
> >> To achieve this the patch splits handling of banned contexts from simply
> >> exited non-persistent ones and then applies different timeouts for both
> >> and also extends the criteria which determines if a request should be
> >> scheduled back in after preemption or not.
> >>
> >> 15ms preempt timeout grace is given to exited non-persistent contexts
> >> which have been empirically tested to satisfy customers requirements
> >> and still provides reasonably quick cleanup post exit.
> >
> > Same here, a bit more detail on what exactly was the problem to be fixed
> > is needed.
>
> It is a bit multi-faceted. Start with how in some cultures errors
> messages are much bigger error flags than in others and much more
> difficult to hand-wave "oh that's not a problem really". The the
> previous considerations about why not avoid engine reset if we can. Add
> on top how non-persistent context exiting is not really an error worthy
> of a reset, *if* it exits cleanly reasonably quickly.
>
> You could say make it clean up for itself before it exits, not a kernel
> problem. But on the balance of everything, to me it sounds saleable to
> just give it some longer time compared to banned contexts, which are the
> unquestionably naughty/dangerous ones. Also, how fast non-persistent
> contexts will be cleaned up hasn't been defined really. As long as 15ms
> is an order of magnitude, plus some, shorter than the normal preempt
> timeout I think it is fine.

Yeah I think makes all sense, I just asked since the commit message
hinted at reasons but didn't explain any of them. I also assume that
generally the full reset flow is also on the order of 1ms (haven't
checked, but it's not cheap), so not being aggressive with engaging
that hammer is imo itself a very good reason to be a notch more lax
here.
-Danie

>
> Regards,
>
> Tvrtko
>
> P.S. Otherwise I plan to respin v2 with consolidated CONTEXT_SCHEDULABLE
> flag so fast paths do not have to do too many individual checks.
>
> > -Daniel
> >
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Zhen Han <zhen.han@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/gem/i915_gem_context.c     | 15 +++++++++------
> >>   drivers/gpu/drm/i915/gt/intel_context.h         | 17 +++++++++++++++++
> >>   drivers/gpu/drm/i915/gt/intel_context_types.h   |  1 +
> >>   .../drm/i915/gt/intel_execlists_submission.c    | 12 ++++++++++--
> >>   drivers/gpu/drm/i915/i915_request.c             |  2 +-
> >>   5 files changed, 38 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> index fd8ee52e17a4..5a6eba1232cd 100644
> >> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> @@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
> >>      return engine;
> >>   }
> >>
> >> -static void kill_engines(struct i915_gem_engines *engines, bool ban)
> >> +static void
> >> +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
> >>   {
> >>      struct i915_gem_engines_iter it;
> >>      struct intel_context *ce;
> >> @@ -443,6 +444,8 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
> >>
> >>              if (ban && intel_context_set_banned(ce))
> >>                      continue;
> >> +            else if (!persistent && intel_context_set_non_persistent(ce))
> >> +                    continue;
> >>
> >>              /*
> >>               * Check the current active state of this context; if we
> >> @@ -454,7 +457,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
> >>              engine = active_engine(ce);
> >>
> >>              /* First attempt to gracefully cancel the context */
> >> -            if (engine && !__cancel_engine(engine) && ban)
> >> +            if (engine && !__cancel_engine(engine) && (ban || !persistent))
> >>                      /*
> >>                       * If we are unable to send a preemptive pulse to bump
> >>                       * the context from the GPU, we have to resort to a full
> >> @@ -466,8 +469,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
> >>
> >>   static void kill_context(struct i915_gem_context *ctx)
> >>   {
> >> -    bool ban = (!i915_gem_context_is_persistent(ctx) ||
> >> -                !ctx->i915->params.enable_hangcheck);
> >>      struct i915_gem_engines *pos, *next;
> >>
> >>      spin_lock_irq(&ctx->stale.lock);
> >> @@ -480,7 +481,8 @@ static void kill_context(struct i915_gem_context *ctx)
> >>
> >>              spin_unlock_irq(&ctx->stale.lock);
> >>
> >> -            kill_engines(pos, ban);
> >> +            kill_engines(pos, !ctx->i915->params.enable_hangcheck,
> >> +                         i915_gem_context_is_persistent(ctx));
> >>
> >>              spin_lock_irq(&ctx->stale.lock);
> >>              GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
> >> @@ -526,7 +528,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
> >>
> >>   kill:
> >>      if (list_empty(&engines->link)) /* raced, already closed */
> >> -            kill_engines(engines, true);
> >> +            kill_engines(engines, true,
> >> +                         i915_gem_context_is_persistent(ctx));
> >>
> >>      i915_sw_fence_commit(&engines->fence);
> >>   }
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> >> index f83a73a2b39f..b0e812b8ce39 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> >> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> >> @@ -220,6 +220,23 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
> >>      return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
> >>   }
> >>
> >> +static inline bool intel_context_is_non_persistent(const struct intel_context *ce)
> >> +{
> >> +    return test_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
> >> +}
> >> +
> >> +static inline bool intel_context_set_non_persistent(struct intel_context *ce)
> >> +{
> >> +    return test_and_set_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
> >> +}
> >> +
> >> +static inline bool intel_context_is_schedulable(const struct intel_context *ce)
> >> +{
> >> +    return !intel_context_is_banned(ce) &&
> >> +           !(intel_context_is_closed(ce) &&
> >> +             intel_context_is_non_persistent(ce));
> >> +}
> >> +
> >>   static inline bool
> >>   intel_context_force_single_submission(const struct intel_context *ce)
> >>   {
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> >> index ed8c447a7346..aa949c539e93 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> >> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> >> @@ -95,6 +95,7 @@ struct intel_context {
> >>   #define CONTEXT_BANNED                     6
> >>   #define CONTEXT_FORCE_SINGLE_SUBMISSION    7
> >>   #define CONTEXT_NOPREEMPT          8
> >> +#define CONTEXT_NON_PERSISTENT              9 /* Only if also closed. */
> >>
> >>      struct {
> >>              u64 timeout_us;
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> >> index de124870af44..5ad7272fbbc4 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> >> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> >> @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
> >>                   !intel_engine_has_heartbeat(engine)))
> >>              intel_context_set_banned(ce);
> >>
> >> -    if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
> >> +    if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
> >>              reset_active(rq, engine);
> >>
> >>      if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> >> @@ -1204,12 +1204,20 @@ static void record_preemption(struct intel_engine_execlists *execlists)
> >>   static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
> >>                                          const struct i915_request *rq)
> >>   {
> >> +    struct intel_context *ce;
> >> +
> >>      if (!rq)
> >>              return 0;
> >>
> >> +    ce = rq->context;
> >> +
> >>      /* Force a fast reset for terminated contexts (ignoring sysfs!) */
> >> -    if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
> >> +    if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
> >>              return 1;
> >> +    /* Longer grace for closed non-persistent contexts to avoid resets. */
> >> +    else if (unlikely(intel_context_is_closed(ce) &&
> >> +                      intel_context_is_non_persistent(ce)))
> >> +            return 15;
> >>
> >>      return READ_ONCE(engine->props.preempt_timeout_ms);
> >>   }
> >> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> >> index bec9c3652188..bbac87535923 100644
> >> --- a/drivers/gpu/drm/i915/i915_request.c
> >> +++ b/drivers/gpu/drm/i915/i915_request.c
> >> @@ -660,7 +660,7 @@ bool __i915_request_submit(struct i915_request *request)
> >>              goto active;
> >>      }
> >>
> >> -    if (unlikely(intel_context_is_banned(request->context)))
> >> +    if (unlikely(!intel_context_is_schedulable(request->context)))
> >>              i915_request_set_error_once(request, -EIO);
> >>
> >>      if (unlikely(fatal_error(request->fence.error)))
> >> --
> >> 2.30.2
> >>
> >> _______________________________________________
> >> Intel-gfx mailing list
> >> Intel-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Be more gentle with exiting non-persistent context
@ 2021-04-30 14:34       ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2021-04-30 14:34 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Chris Wilson, dri-devel, Zhen Han

On Fri, Apr 30, 2021 at 3:27 PM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
> On 30/04/2021 12:48, Daniel Vetter wrote:
> > On Thu, Apr 29, 2021 at 10:46:40AM +0100, Tvrtko Ursulin wrote:
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> When a non-persistent context exits we currently mark it as banned in
> >> order to trigger fast termination of any outstanding GPU jobs it may have
> >> left running.
> >>
> >> In doing so we apply a very strict 1ms limit in which the left over job
> >> has to preempt before we issues an engine resets.
> >>
> >> Some workloads are not able to cleanly preempt in that time window and it
> >> can be argued that it would instead be better to give them a bit more
> >> grace since avoiding engine resets is generally preferrable.
> >
> > Can you pls explain here why this is preferrable?
>
> I think there's always the risk of an innocent request getting axed with
> preempt to busy being very asynchronous and also engine reset can
> sometimes fail as well.
>
> >> To achieve this the patch splits handling of banned contexts from simply
> >> exited non-persistent ones and then applies different timeouts for both
> >> and also extends the criteria which determines if a request should be
> >> scheduled back in after preemption or not.
> >>
> >> 15ms preempt timeout grace is given to exited non-persistent contexts
> >> which have been empirically tested to satisfy customers requirements
> >> and still provides reasonably quick cleanup post exit.
> >
> > Same here, a bit more detail on what exactly was the problem to be fixed
> > is needed.
>
> It is a bit multi-faceted. Start with how in some cultures errors
> messages are much bigger error flags than in others and much more
> difficult to hand-wave "oh that's not a problem really". The the
> previous considerations about why not avoid engine reset if we can. Add
> on top how non-persistent context exiting is not really an error worthy
> of a reset, *if* it exits cleanly reasonably quickly.
>
> You could say make it clean up for itself before it exits, not a kernel
> problem. But on the balance of everything, to me it sounds saleable to
> just give it some longer time compared to banned contexts, which are the
> unquestionably naughty/dangerous ones. Also, how fast non-persistent
> contexts will be cleaned up hasn't been defined really. As long as 15ms
> is an order of magnitude, plus some, shorter than the normal preempt
> timeout I think it is fine.

Yeah I think makes all sense, I just asked since the commit message
hinted at reasons but didn't explain any of them. I also assume that
generally the full reset flow is also on the order of 1ms (haven't
checked, but it's not cheap), so not being aggressive with engaging
that hammer is imo itself a very good reason to be a notch more lax
here.
-Danie

>
> Regards,
>
> Tvrtko
>
> P.S. Otherwise I plan to respin v2 with consolidated CONTEXT_SCHEDULABLE
> flag so fast paths do not have to do too many individual checks.
>
> > -Daniel
> >
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Zhen Han <zhen.han@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/gem/i915_gem_context.c     | 15 +++++++++------
> >>   drivers/gpu/drm/i915/gt/intel_context.h         | 17 +++++++++++++++++
> >>   drivers/gpu/drm/i915/gt/intel_context_types.h   |  1 +
> >>   .../drm/i915/gt/intel_execlists_submission.c    | 12 ++++++++++--
> >>   drivers/gpu/drm/i915/i915_request.c             |  2 +-
> >>   5 files changed, 38 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> index fd8ee52e17a4..5a6eba1232cd 100644
> >> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> >> @@ -426,7 +426,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
> >>      return engine;
> >>   }
> >>
> >> -static void kill_engines(struct i915_gem_engines *engines, bool ban)
> >> +static void
> >> +kill_engines(struct i915_gem_engines *engines, bool ban, bool persistent)
> >>   {
> >>      struct i915_gem_engines_iter it;
> >>      struct intel_context *ce;
> >> @@ -443,6 +444,8 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
> >>
> >>              if (ban && intel_context_set_banned(ce))
> >>                      continue;
> >> +            else if (!persistent && intel_context_set_non_persistent(ce))
> >> +                    continue;
> >>
> >>              /*
> >>               * Check the current active state of this context; if we
> >> @@ -454,7 +457,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
> >>              engine = active_engine(ce);
> >>
> >>              /* First attempt to gracefully cancel the context */
> >> -            if (engine && !__cancel_engine(engine) && ban)
> >> +            if (engine && !__cancel_engine(engine) && (ban || !persistent))
> >>                      /*
> >>                       * If we are unable to send a preemptive pulse to bump
> >>                       * the context from the GPU, we have to resort to a full
> >> @@ -466,8 +469,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
> >>
> >>   static void kill_context(struct i915_gem_context *ctx)
> >>   {
> >> -    bool ban = (!i915_gem_context_is_persistent(ctx) ||
> >> -                !ctx->i915->params.enable_hangcheck);
> >>      struct i915_gem_engines *pos, *next;
> >>
> >>      spin_lock_irq(&ctx->stale.lock);
> >> @@ -480,7 +481,8 @@ static void kill_context(struct i915_gem_context *ctx)
> >>
> >>              spin_unlock_irq(&ctx->stale.lock);
> >>
> >> -            kill_engines(pos, ban);
> >> +            kill_engines(pos, !ctx->i915->params.enable_hangcheck,
> >> +                         i915_gem_context_is_persistent(ctx));
> >>
> >>              spin_lock_irq(&ctx->stale.lock);
> >>              GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
> >> @@ -526,7 +528,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
> >>
> >>   kill:
> >>      if (list_empty(&engines->link)) /* raced, already closed */
> >> -            kill_engines(engines, true);
> >> +            kill_engines(engines, true,
> >> +                         i915_gem_context_is_persistent(ctx));
> >>
> >>      i915_sw_fence_commit(&engines->fence);
> >>   }
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> >> index f83a73a2b39f..b0e812b8ce39 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> >> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> >> @@ -220,6 +220,23 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
> >>      return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
> >>   }
> >>
> >> +static inline bool intel_context_is_non_persistent(const struct intel_context *ce)
> >> +{
> >> +    return test_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
> >> +}
> >> +
> >> +static inline bool intel_context_set_non_persistent(struct intel_context *ce)
> >> +{
> >> +    return test_and_set_bit(CONTEXT_NON_PERSISTENT, &ce->flags);
> >> +}
> >> +
> >> +static inline bool intel_context_is_schedulable(const struct intel_context *ce)
> >> +{
> >> +    return !intel_context_is_banned(ce) &&
> >> +           !(intel_context_is_closed(ce) &&
> >> +             intel_context_is_non_persistent(ce));
> >> +}
> >> +
> >>   static inline bool
> >>   intel_context_force_single_submission(const struct intel_context *ce)
> >>   {
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> >> index ed8c447a7346..aa949c539e93 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> >> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> >> @@ -95,6 +95,7 @@ struct intel_context {
> >>   #define CONTEXT_BANNED                     6
> >>   #define CONTEXT_FORCE_SINGLE_SUBMISSION    7
> >>   #define CONTEXT_NOPREEMPT          8
> >> +#define CONTEXT_NON_PERSISTENT              9 /* Only if also closed. */
> >>
> >>      struct {
> >>              u64 timeout_us;
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> >> index de124870af44..5ad7272fbbc4 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> >> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> >> @@ -478,7 +478,7 @@ __execlists_schedule_in(struct i915_request *rq)
> >>                   !intel_engine_has_heartbeat(engine)))
> >>              intel_context_set_banned(ce);
> >>
> >> -    if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
> >> +    if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
> >>              reset_active(rq, engine);
> >>
> >>      if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> >> @@ -1204,12 +1204,20 @@ static void record_preemption(struct intel_engine_execlists *execlists)
> >>   static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
> >>                                          const struct i915_request *rq)
> >>   {
> >> +    struct intel_context *ce;
> >> +
> >>      if (!rq)
> >>              return 0;
> >>
> >> +    ce = rq->context;
> >> +
> >>      /* Force a fast reset for terminated contexts (ignoring sysfs!) */
> >> -    if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
> >> +    if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
> >>              return 1;
> >> +    /* Longer grace for closed non-persistent contexts to avoid resets. */
> >> +    else if (unlikely(intel_context_is_closed(ce) &&
> >> +                      intel_context_is_non_persistent(ce)))
> >> +            return 15;
> >>
> >>      return READ_ONCE(engine->props.preempt_timeout_ms);
> >>   }
> >> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> >> index bec9c3652188..bbac87535923 100644
> >> --- a/drivers/gpu/drm/i915/i915_request.c
> >> +++ b/drivers/gpu/drm/i915/i915_request.c
> >> @@ -660,7 +660,7 @@ bool __i915_request_submit(struct i915_request *request)
> >>              goto active;
> >>      }
> >>
> >> -    if (unlikely(intel_context_is_banned(request->context)))
> >> +    if (unlikely(!intel_context_is_schedulable(request->context)))
> >>              i915_request_set_error_once(request, -EIO);
> >>
> >>      if (unlikely(fatal_error(request->fence.error)))
> >> --
> >> 2.30.2
> >>
> >> _______________________________________________
> >> Intel-gfx mailing list
> >> Intel-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Be more gentle with exiting non-persistent context (rev2)
  2021-04-29  9:46 ` [Intel-gfx] " Tvrtko Ursulin
                   ` (5 preceding siblings ...)
  (?)
@ 2021-05-03 12:34 ` Patchwork
  -1 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2021-05-03 12:34 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 12685 bytes --]

== Series Details ==

Series: drm/i915: Be more gentle with exiting non-persistent context (rev2)
URL   : https://patchwork.freedesktop.org/series/89644/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_10036 -> Patchwork_20045
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_20045 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_20045, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_20045:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@execlists:
    - fi-cfl-8109u:       [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-cfl-8109u/igt@i915_selftest@live@execlists.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-cfl-8109u/igt@i915_selftest@live@execlists.html
    - fi-glk-dsi:         [PASS][3] -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-glk-dsi/igt@i915_selftest@live@execlists.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-glk-dsi/igt@i915_selftest@live@execlists.html
    - fi-cfl-8700k:       [PASS][5] -> [INCOMPLETE][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-cfl-8700k/igt@i915_selftest@live@execlists.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-cfl-8700k/igt@i915_selftest@live@execlists.html
    - fi-tgl-u2:          [PASS][7] -> [INCOMPLETE][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-tgl-u2/igt@i915_selftest@live@execlists.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-tgl-u2/igt@i915_selftest@live@execlists.html
    - fi-cfl-guc:         [PASS][9] -> [INCOMPLETE][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-cfl-guc/igt@i915_selftest@live@execlists.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-cfl-guc/igt@i915_selftest@live@execlists.html
    - fi-bxt-dsi:         [PASS][11] -> [INCOMPLETE][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-bxt-dsi/igt@i915_selftest@live@execlists.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-bxt-dsi/igt@i915_selftest@live@execlists.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_selftest@live@execlists:
    - {fi-tgl-1115g4}:    [PASS][13] -> [INCOMPLETE][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-tgl-1115g4/igt@i915_selftest@live@execlists.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-tgl-1115g4/igt@i915_selftest@live@execlists.html
    - {fi-ehl-1}:         [PASS][15] -> [INCOMPLETE][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-ehl-1/igt@i915_selftest@live@execlists.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-ehl-1/igt@i915_selftest@live@execlists.html
    - {fi-tgl-dsi}:       [PASS][17] -> [INCOMPLETE][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-tgl-dsi/igt@i915_selftest@live@execlists.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-tgl-dsi/igt@i915_selftest@live@execlists.html

  * igt@runner@aborted:
    - {fi-rkl-11500t}:    NOTRUN -> [FAIL][19]
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-rkl-11500t/igt@runner@aborted.html

  
Known issues
------------

  Here are the changes found in Patchwork_20045 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live@execlists:
    - fi-kbl-r:           [PASS][20] -> [INCOMPLETE][21] ([i915#2782] / [i915#794])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-kbl-r/igt@i915_selftest@live@execlists.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-kbl-r/igt@i915_selftest@live@execlists.html
    - fi-bsw-nick:        [PASS][22] -> [INCOMPLETE][23] ([i915#2782] / [i915#2940])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-bsw-nick/igt@i915_selftest@live@execlists.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-bsw-nick/igt@i915_selftest@live@execlists.html
    - fi-kbl-x1275:       [PASS][24] -> [INCOMPLETE][25] ([i915#2782] / [i915#794])
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-kbl-x1275/igt@i915_selftest@live@execlists.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-kbl-x1275/igt@i915_selftest@live@execlists.html
    - fi-icl-u2:          [PASS][26] -> [INCOMPLETE][27] ([i915#2782])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-icl-u2/igt@i915_selftest@live@execlists.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-icl-u2/igt@i915_selftest@live@execlists.html
    - fi-skl-6600u:       [PASS][28] -> [INCOMPLETE][29] ([i915#2782])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-skl-6600u/igt@i915_selftest@live@execlists.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-skl-6600u/igt@i915_selftest@live@execlists.html
    - fi-bsw-kefka:       [PASS][30] -> [INCOMPLETE][31] ([i915#2782] / [i915#2940])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-bsw-kefka/igt@i915_selftest@live@execlists.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-bsw-kefka/igt@i915_selftest@live@execlists.html
    - fi-kbl-7567u:       [PASS][32] -> [INCOMPLETE][33] ([i915#2782] / [i915#794])
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-kbl-7567u/igt@i915_selftest@live@execlists.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-kbl-7567u/igt@i915_selftest@live@execlists.html
    - fi-cml-s:           [PASS][34] -> [INCOMPLETE][35] ([i915#3233])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-cml-s/igt@i915_selftest@live@execlists.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-cml-s/igt@i915_selftest@live@execlists.html
    - fi-cml-u2:          [PASS][36] -> [INCOMPLETE][37] ([i915#3233])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-cml-u2/igt@i915_selftest@live@execlists.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-cml-u2/igt@i915_selftest@live@execlists.html
    - fi-kbl-guc:         [PASS][38] -> [INCOMPLETE][39] ([i915#2782] / [i915#794])
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-kbl-guc/igt@i915_selftest@live@execlists.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-kbl-guc/igt@i915_selftest@live@execlists.html
    - fi-kbl-7500u:       [PASS][40] -> [INCOMPLETE][41] ([i915#2782] / [i915#794])
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-kbl-7500u/igt@i915_selftest@live@execlists.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-kbl-7500u/igt@i915_selftest@live@execlists.html

  * igt@i915_selftest@live@hangcheck:
    - fi-snb-2600:        [PASS][42] -> [INCOMPLETE][43] ([i915#2782])
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10036/fi-snb-2600/igt@i915_selftest@live@hangcheck.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-snb-2600/igt@i915_selftest@live@hangcheck.html

  * igt@runner@aborted:
    - fi-kbl-x1275:       NOTRUN -> [FAIL][44] ([i915#1436] / [i915#3363])
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-kbl-x1275/igt@runner@aborted.html
    - fi-bsw-kefka:       NOTRUN -> [FAIL][45] ([i915#1436])
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-bsw-kefka/igt@runner@aborted.html
    - fi-cfl-8700k:       NOTRUN -> [FAIL][46] ([i915#3363])
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-cfl-8700k/igt@runner@aborted.html
    - fi-skl-6600u:       NOTRUN -> [FAIL][47] ([i915#1436] / [i915#3363])
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-skl-6600u/igt@runner@aborted.html
    - fi-cfl-8109u:       NOTRUN -> [FAIL][48] ([i915#3363])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-cfl-8109u/igt@runner@aborted.html
    - fi-icl-u2:          NOTRUN -> [FAIL][49] ([i915#2782] / [i915#3363])
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-icl-u2/igt@runner@aborted.html
    - fi-glk-dsi:         NOTRUN -> [FAIL][50] ([i915#3363] / [k.org#202321])
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-glk-dsi/igt@runner@aborted.html
    - fi-bsw-nick:        NOTRUN -> [FAIL][51] ([i915#1436])
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-bsw-nick/igt@runner@aborted.html
    - fi-kbl-r:           NOTRUN -> [FAIL][52] ([i915#1436] / [i915#3363])
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-kbl-r/igt@runner@aborted.html
    - fi-bdw-5557u:       NOTRUN -> [FAIL][53] ([i915#1602] / [i915#2029])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-bdw-5557u/igt@runner@aborted.html
    - fi-kbl-7500u:       NOTRUN -> [FAIL][54] ([i915#1436] / [i915#3363])
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-kbl-7500u/igt@runner@aborted.html
    - fi-kbl-guc:         NOTRUN -> [FAIL][55] ([i915#1436] / [i915#3363])
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-kbl-guc/igt@runner@aborted.html
    - fi-cml-u2:          NOTRUN -> [FAIL][56] ([i915#3363])
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-cml-u2/igt@runner@aborted.html
    - fi-bxt-dsi:         NOTRUN -> [FAIL][57] ([i915#3363])
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-bxt-dsi/igt@runner@aborted.html
    - fi-cml-s:           NOTRUN -> [FAIL][58] ([i915#3363])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-cml-s/igt@runner@aborted.html
    - fi-cfl-guc:         NOTRUN -> [FAIL][59] ([i915#3363])
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-cfl-guc/igt@runner@aborted.html
    - fi-kbl-7567u:       NOTRUN -> [FAIL][60] ([i915#1436] / [i915#3363])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-kbl-7567u/igt@runner@aborted.html
    - fi-tgl-u2:          NOTRUN -> [FAIL][61] ([i915#1436] / [i915#2966])
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/fi-tgl-u2/igt@runner@aborted.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#1222]: https://gitlab.freedesktop.org/drm/intel/issues/1222
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#1602]: https://gitlab.freedesktop.org/drm/intel/issues/1602
  [i915#2029]: https://gitlab.freedesktop.org/drm/intel/issues/2029
  [i915#2505]: https://gitlab.freedesktop.org/drm/intel/issues/2505
  [i915#2782]: https://gitlab.freedesktop.org/drm/intel/issues/2782
  [i915#2940]: https://gitlab.freedesktop.org/drm/intel/issues/2940
  [i915#2966]: https://gitlab.freedesktop.org/drm/intel/issues/2966
  [i915#3233]: https://gitlab.freedesktop.org/drm/intel/issues/3233
  [i915#3363]: https://gitlab.freedesktop.org/drm/intel/issues/3363
  [i915#794]: https://gitlab.freedesktop.org/drm/intel/issues/794
  [k.org#202321]: https://bugzilla.kernel.org/show_bug.cgi?id=202321
  [k.org#205379]: https://bugzilla.kernel.org/show_bug.cgi?id=205379


Participating hosts (46 -> 39)
------------------------------

  Missing    (7): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-bsw-cyan fi-ctg-p8600 fi-tgl-y fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_10036 -> Patchwork_20045

  CI-20190529: 20190529
  CI_DRM_10036: 08705ab833ebfc80bc683cb5269d04944bcf8d20 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6077: 126a3f6fc0e97786e2819085efc84e741093aed5 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_20045: d91353edfe76772b58adc310d39aa897f07f8a4f @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

d91353edfe76 drm/i915: Be more gentle with exiting non-persistent context

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20045/index.html

[-- Attachment #1.2: Type: text/html, Size: 16148 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-05-03 12:35 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-29  9:46 [PATCH] drm/i915: Be more gentle with exiting non-persistent context Tvrtko Ursulin
2021-04-29  9:46 ` [Intel-gfx] " Tvrtko Ursulin
2021-04-29 15:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2021-04-29 15:43 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-04-29 18:41 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2021-04-30 11:48 ` [Intel-gfx] [PATCH] " Daniel Vetter
2021-04-30 11:48   ` Daniel Vetter
2021-04-30 13:27   ` Tvrtko Ursulin
2021-04-30 13:27     ` Tvrtko Ursulin
2021-04-30 14:34     ` Daniel Vetter
2021-04-30 14:34       ` Daniel Vetter
2021-04-30 14:10 ` [PATCH v2] " Tvrtko Ursulin
2021-04-30 14:10   ` [Intel-gfx] " Tvrtko Ursulin
2021-05-03 12:34 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Be more gentle with exiting non-persistent context (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.