* [Intel-gfx] [PATCH 1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
@ 2020-03-26 23:18 Chris Wilson
2020-03-26 23:18 ` [Intel-gfx] [PATCH 2/2] drm/i915/execlists: Explicitly reset both reg and context runtime Chris Wilson
` (3 more replies)
0 siblings, 4 replies; 7+ messages in thread
From: Chris Wilson @ 2020-03-26 23:18 UTC (permalink / raw)
To: intel-gfx
In what seems remarkably similar to the w/a required to not reload an
idle context with HEAD==TAIL, it appears we must prevent the HW from
switching to an idle context in ELSP[1], while simultaneously trying to
preempt the HW to run another context and a continuation of the idle
context (which is no longer idle).
process_csb: vecs0: cs-irq head=0, tail=1
process_csb: vecs0: csb[1]: status=0x00000882:0x00000020
trace_ports: vecs0: preempted { 8c0:30!, 0:0 }
trace_ports: vecs0: promote { 8b2:32!, 8c0:30 }
trace_ports: vecs0: submit { 8b8:32, 8c0:32 }
process_csb: vecs0: cs-irq head=1, tail=2
process_csb: vecs0: csb[2]: status=0x00000814:0x00000040
trace_ports: vecs0: completed { 8b2:32!, 8c0:30 }
process_csb: vecs0: cs-irq head=2, tail=5
process_csb: vecs0: csb[3]: status=0x00000812:0x00000020
trace_ports: vecs0: preempted { 8c0:30!, 0:0 }
trace_ports: vecs0: promote { 8b8:32!, 8c0:32 }
process_csb: vecs0: csb[4]: status=0x00000814:0x00000060
trace_ports: vecs0: completed { 8b8:32!, 8c0:32 }
process_csb: vecs0: csb[5]: status=0x00000818:0x00000020
trace_ports: vecs0: completed { 8c0:32, 0:0 }
process_csb: vecs0: ring:{start:0x00021000, head:03f8, tail:03f8, ctl:00000000, mode:00000200}
process_csb: vecs0: rq:{start:00021000, head:03c0, tail:0400, seqno:8c0:32, hwsp:30},
process_csb: vecs0: ctx:{start:00021000, head:03f8, tail:03f8},
process_csb: GEM_BUG_ON("context completed before request")
Fortunately, we just so happen to have a semaphore in place to prevent
the ring HEAD from proceeding past the end of a request that we can use
to fix the HEAD in position as we reprogram ELSP.
Closes: https://gitlab.freedesktop.org/drm/intel/issues/1501
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 72 ++++++++++++++---------------
1 file changed, 36 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b12355048501..4edda15eba26 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1854,7 +1854,7 @@ static inline void clear_ports(struct i915_request **ports, int count)
memset_p((void **)ports, NULL, count);
}
-static void execlists_dequeue(struct intel_engine_cs *engine)
+static bool execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request **port = execlists->pending;
@@ -1928,13 +1928,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
execlists->queue_priority_hint);
record_preemption(execlists);
- /*
- * Don't let the RING_HEAD advance past the breadcrumb
- * as we unwind (and until we resubmit) so that we do
- * not accidentally tell it to go backwards.
- */
- ring_set_paused(engine, 1);
-
/*
* Note that we have not stopped the GPU at this point,
* so we are unwinding the incomplete requests as they
@@ -1954,7 +1947,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last->sched.attr.priority,
execlists->queue_priority_hint);
- ring_set_paused(engine, 1);
defer_active(engine);
/*
@@ -1988,7 +1980,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* of timeslices, our queue might be.
*/
start_timeslice(engine);
- return;
+ return false;
}
}
}
@@ -2021,9 +2013,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
if (last && !can_merge_rq(last, rq)) {
+ /* leave this for another sibling */
spin_unlock(&ve->base.active.lock);
start_timeslice(engine);
- return; /* leave this for another sibling */
+ return false;
}
ENGINE_TRACE(engine,
@@ -2193,32 +2186,31 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* interrupt for secondary ports).
*/
execlists->queue_priority_hint = queue_prio(execlists);
+ if (!submit)
+ return false;
- if (submit) {
- *port = execlists_schedule_in(last, port - execlists->pending);
- execlists->switch_priority_hint =
- switch_prio(engine, *execlists->pending);
+ *port = execlists_schedule_in(last, port - execlists->pending);
+ execlists->switch_priority_hint =
+ switch_prio(engine, *execlists->pending);
- /*
- * Skip if we ended up with exactly the same set of requests,
- * e.g. trying to timeslice a pair of ordered contexts
- */
- if (!memcmp(active, execlists->pending,
- (port - execlists->pending + 1) * sizeof(*port))) {
- do
- execlists_schedule_out(fetch_and_zero(port));
- while (port-- != execlists->pending);
-
- goto skip_submit;
- }
- clear_ports(port + 1, last_port - port);
+ /*
+ * Skip if we ended up with exactly the same set of requests,
+ * e.g. trying to timeslice a pair of ordered contexts
+ */
+ if (!memcmp(active, execlists->pending,
+ (port - execlists->pending + 1) * sizeof(*port))) {
+ do
+ execlists_schedule_out(fetch_and_zero(port));
+ while (port-- != execlists->pending);
- execlists_submit_ports(engine);
- set_preempt_timeout(engine, *active);
- } else {
-skip_submit:
- ring_set_paused(engine, 0);
+ return false;
}
+ clear_ports(port + 1, last_port - port);
+
+ execlists_submit_ports(engine);
+ set_preempt_timeout(engine, *active);
+ tasklet_hi_schedule(&execlists->tasklet);
+ return true;
}
static void
@@ -2478,7 +2470,16 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
lockdep_assert_held(&engine->active.lock);
if (!READ_ONCE(engine->execlists.pending[0])) {
rcu_read_lock(); /* protect peeking at execlists->active */
- execlists_dequeue(engine);
+
+ /*
+ * Don't let the RING_HEAD advance past the breadcrumb
+ * as we unwind (and until we resubmit) so that we do
+ * not accidentally tell it to go backwards.
+ */
+ ring_set_paused(engine, 1);
+ if (!execlists_dequeue(engine))
+ ring_set_paused(engine, 0);
+
rcu_read_unlock();
}
}
@@ -2816,8 +2817,7 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
ring_set_paused(engine, 1); /* Freeze the current request in place */
if (execlists_capture(engine))
intel_engine_reset(engine, msg);
- else
- ring_set_paused(engine, 0);
+ ring_set_paused(engine, 0);
tasklet_enable(&engine->execlists.tasklet);
clear_and_wake_up_bit(bit, lock);
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Intel-gfx] [PATCH 2/2] drm/i915/execlists: Explicitly reset both reg and context runtime
2020-03-26 23:18 [Intel-gfx] [PATCH 1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context Chris Wilson
@ 2020-03-26 23:18 ` Chris Wilson
2020-03-27 16:01 ` Tvrtko Ursulin
2020-03-27 0:15 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context Patchwork
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2020-03-26 23:18 UTC (permalink / raw)
To: intel-gfx
Upon a GPU reset, we copy the default context image over top of the
guilty image. This will rollback the CTX_TIMESTAMP register to before
our value of ce->runtime.last. Reset both back to 0 so that we do not
encounter an underflow on the next schedule out after resume.
This should not be a huge issue in practice, as hangs should be rare in
correct code.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 4edda15eba26..47cec545a069 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -238,6 +238,17 @@ __execlists_update_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine,
u32 head);
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+ /*
+ * We can use either ppHWSP[16] which is recorded before the context
+ * switch (and so excludes the cost of context switches) or use the
+ * value from the context image itself, which is saved/restored earlier
+ * and so includes the cost of the save.
+ */
+ return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
static void mark_eio(struct i915_request *rq)
{
if (i915_request_completed(rq))
@@ -1154,6 +1165,7 @@ static void restore_default_state(struct intel_context *ce,
engine->context_size - PAGE_SIZE);
execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+ ce->runtime.last = intel_context_get_runtime(ce);
}
static void reset_active(struct i915_request *rq,
@@ -1195,17 +1207,6 @@ static void reset_active(struct i915_request *rq,
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}
-static u32 intel_context_get_runtime(const struct intel_context *ce)
-{
- /*
- * We can use either ppHWSP[16] which is recorded before the context
- * switch (and so excludes the cost of context switches) or use the
- * value from the context image itself, which is saved/restored earlier
- * and so includes the cost of the save.
- */
- return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
-}
-
static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
{
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -4581,6 +4582,7 @@ static void init_common_reg_state(u32 * const regs,
regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ regs[CTX_TIMESTAMP] = 0;
}
static void init_wa_bb_reg_state(u32 * const regs,
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [Intel-gfx] [PATCH 2/2] drm/i915/execlists: Explicitly reset both reg and context runtime
2020-03-26 23:18 ` [Intel-gfx] [PATCH 2/2] drm/i915/execlists: Explicitly reset both reg and context runtime Chris Wilson
@ 2020-03-27 16:01 ` Tvrtko Ursulin
0 siblings, 0 replies; 7+ messages in thread
From: Tvrtko Ursulin @ 2020-03-27 16:01 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 26/03/2020 23:18, Chris Wilson wrote:
> Upon a GPU reset, we copy the default context image over top of the
> guilty image. This will rollback the CTX_TIMESTAMP register to before
> our value of ce->runtime.last. Reset both back to 0 so that we do not
> encounter an underflow on the next schedule out after resume.
>
> This should not be a huge issue in practice, as hangs should be rare in
> correct code.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_lrc.c | 24 +++++++++++++-----------
> 1 file changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 4edda15eba26..47cec545a069 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -238,6 +238,17 @@ __execlists_update_reg_state(const struct intel_context *ce,
> const struct intel_engine_cs *engine,
> u32 head);
>
> +static u32 intel_context_get_runtime(const struct intel_context *ce)
> +{
> + /*
> + * We can use either ppHWSP[16] which is recorded before the context
> + * switch (and so excludes the cost of context switches) or use the
> + * value from the context image itself, which is saved/restored earlier
> + * and so includes the cost of the save.
> + */
> + return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
> +}
> +
> static void mark_eio(struct i915_request *rq)
> {
> if (i915_request_completed(rq))
> @@ -1154,6 +1165,7 @@ static void restore_default_state(struct intel_context *ce,
> engine->context_size - PAGE_SIZE);
>
> execlists_init_reg_state(regs, ce, engine, ce->ring, false);
> + ce->runtime.last = intel_context_get_runtime(ce);
> }
>
> static void reset_active(struct i915_request *rq,
> @@ -1195,17 +1207,6 @@ static void reset_active(struct i915_request *rq,
> ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
> }
>
> -static u32 intel_context_get_runtime(const struct intel_context *ce)
> -{
> - /*
> - * We can use either ppHWSP[16] which is recorded before the context
> - * switch (and so excludes the cost of context switches) or use the
> - * value from the context image itself, which is saved/restored earlier
> - * and so includes the cost of the save.
> - */
> - return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
> -}
> -
> static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
> {
> #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> @@ -4581,6 +4582,7 @@ static void init_common_reg_state(u32 * const regs,
> regs[CTX_CONTEXT_CONTROL] = ctl;
>
> regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
> + regs[CTX_TIMESTAMP] = 0;
> }
>
> static void init_wa_bb_reg_state(u32 * const regs,
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
2020-03-26 23:18 [Intel-gfx] [PATCH 1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context Chris Wilson
2020-03-26 23:18 ` [Intel-gfx] [PATCH 2/2] drm/i915/execlists: Explicitly reset both reg and context runtime Chris Wilson
@ 2020-03-27 0:15 ` Patchwork
2020-03-27 0:52 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-03-27 13:59 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
3 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2020-03-27 0:15 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
URL : https://patchwork.freedesktop.org/series/75138/
State : warning
== Summary ==
$ dim checkpatch origin/drm-tip
d6626ef65d6d drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
-:29: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#29:
process_csb: vecs0: ring:{start:0x00021000, head:03f8, tail:03f8, ctl:00000000, mode:00000200}
total: 0 errors, 1 warnings, 0 checks, 126 lines checked
185b85c7368a drm/i915/execlists: Explicitly reset both reg and context runtime
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
2020-03-26 23:18 [Intel-gfx] [PATCH 1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context Chris Wilson
2020-03-26 23:18 ` [Intel-gfx] [PATCH 2/2] drm/i915/execlists: Explicitly reset both reg and context runtime Chris Wilson
2020-03-27 0:15 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context Patchwork
@ 2020-03-27 0:52 ` Patchwork
2020-03-27 13:59 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
3 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2020-03-27 0:52 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
URL : https://patchwork.freedesktop.org/series/75138/
State : success
== Summary ==
CI Bug Log - changes from CI_DRM_8197 -> Patchwork_17106
====================================================
Summary
-------
**SUCCESS**
No regressions found.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/index.html
Known issues
------------
Here are the changes found in Patchwork_17106 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@i915_selftest@live@execlists:
- fi-apl-guc: [PASS][1] -> [INCOMPLETE][2] ([fdo#103927])
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/fi-apl-guc/igt@i915_selftest@live@execlists.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/fi-apl-guc/igt@i915_selftest@live@execlists.html
- fi-icl-dsi: [PASS][3] -> [INCOMPLETE][4] ([i915#140])
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/fi-icl-dsi/igt@i915_selftest@live@execlists.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/fi-icl-dsi/igt@i915_selftest@live@execlists.html
- fi-cml-s: [PASS][5] -> [INCOMPLETE][6] ([i915#283])
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/fi-cml-s/igt@i915_selftest@live@execlists.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/fi-cml-s/igt@i915_selftest@live@execlists.html
* igt@kms_chamelium@dp-crc-fast:
- fi-cml-u2: [PASS][7] -> [FAIL][8] ([i915#262])
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/fi-cml-u2/igt@kms_chamelium@dp-crc-fast.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/fi-cml-u2/igt@kms_chamelium@dp-crc-fast.html
#### Possible fixes ####
* igt@i915_selftest@live@late_gt_pm:
- fi-bwr-2160: [INCOMPLETE][9] -> [PASS][10]
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/fi-bwr-2160/igt@i915_selftest@live@late_gt_pm.html
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/fi-bwr-2160/igt@i915_selftest@live@late_gt_pm.html
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
[i915#140]: https://gitlab.freedesktop.org/drm/intel/issues/140
[i915#262]: https://gitlab.freedesktop.org/drm/intel/issues/262
[i915#283]: https://gitlab.freedesktop.org/drm/intel/issues/283
[i915#647]: https://gitlab.freedesktop.org/drm/intel/issues/647
[i915#656]: https://gitlab.freedesktop.org/drm/intel/issues/656
Participating hosts (44 -> 41)
------------------------------
Additional (2): fi-kbl-r fi-kbl-7500u
Missing (5): fi-hsw-4770r fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-bdw-samus
Build changes
-------------
* CI: CI-20190529 -> None
* Linux: CI_DRM_8197 -> Patchwork_17106
CI-20190529: 20190529
CI_DRM_8197: 198bab1da198b9d6d5c36d52704dd4abab6e81a8 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_5539: e7aae12e37771a8b7796ba252574eb832a5839c3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_17106: 185b85c7368afa95d0c352b09e7ac161ae19e2bc @ git://anongit.freedesktop.org/gfx-ci/linux
== Linux commits ==
185b85c7368a drm/i915/execlists: Explicitly reset both reg and context runtime
d6626ef65d6d drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
2020-03-26 23:18 [Intel-gfx] [PATCH 1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context Chris Wilson
` (2 preceding siblings ...)
2020-03-27 0:52 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-03-27 13:59 ` Patchwork
3 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2020-03-27 13:59 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
URL : https://patchwork.freedesktop.org/series/75138/
State : failure
== Summary ==
CI Bug Log - changes from CI_DRM_8197_full -> Patchwork_17106_full
====================================================
Summary
-------
**FAILURE**
Serious unknown changes coming with Patchwork_17106_full absolutely need to be
verified manually.
If you think the reported changes have nothing to do with the changes
introduced in Patchwork_17106_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.
Possible new issues
-------------------
Here are the unknown changes that may have been introduced in Patchwork_17106_full:
### IGT changes ###
#### Possible regressions ####
* igt@perf_pmu@busy-idle-no-semaphores-rcs0:
- shard-glk: [PASS][1] -> [FAIL][2]
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-glk5/igt@perf_pmu@busy-idle-no-semaphores-rcs0.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-glk2/igt@perf_pmu@busy-idle-no-semaphores-rcs0.html
#### Warnings ####
* igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy-gup@gtt:
- shard-hsw: [DMESG-WARN][3] ([i915#478]) -> [DMESG-WARN][4]
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-hsw8/igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy-gup@gtt.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-hsw4/igt@gem_userptr_blits@map-fixed-invalidate-overlap-busy-gup@gtt.html
Known issues
------------
Here are the changes found in Patchwork_17106_full that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@gem_exec_schedule@implicit-read-write-bsd1:
- shard-iclb: [PASS][5] -> [SKIP][6] ([fdo#109276] / [i915#677]) +2 similar issues
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb2/igt@gem_exec_schedule@implicit-read-write-bsd1.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb3/igt@gem_exec_schedule@implicit-read-write-bsd1.html
* igt@gem_exec_schedule@implicit-write-read-bsd:
- shard-iclb: [PASS][7] -> [SKIP][8] ([i915#677])
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb7/igt@gem_exec_schedule@implicit-write-read-bsd.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb2/igt@gem_exec_schedule@implicit-write-read-bsd.html
* igt@gem_exec_schedule@preemptive-hang-bsd:
- shard-iclb: [PASS][9] -> [SKIP][10] ([fdo#112146]) +3 similar issues
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb3/igt@gem_exec_schedule@preemptive-hang-bsd.html
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb4/igt@gem_exec_schedule@preemptive-hang-bsd.html
* igt@gem_workarounds@suspend-resume:
- shard-apl: [PASS][11] -> [DMESG-WARN][12] ([i915#180])
[11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-apl4/igt@gem_workarounds@suspend-resume.html
[12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-apl1/igt@gem_workarounds@suspend-resume.html
* igt@i915_pm_rc6_residency@rc6-idle:
- shard-snb: [PASS][13] -> [TIMEOUT][14] ([i915#1526])
[13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-snb6/igt@i915_pm_rc6_residency@rc6-idle.html
[14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-snb6/igt@i915_pm_rc6_residency@rc6-idle.html
* igt@i915_suspend@forcewake:
- shard-skl: [PASS][15] -> [INCOMPLETE][16] ([i915#69])
[15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-skl4/igt@i915_suspend@forcewake.html
[16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-skl9/igt@i915_suspend@forcewake.html
* igt@kms_color@pipe-b-ctm-blue-to-red:
- shard-skl: [PASS][17] -> [FAIL][18] ([i915#129])
[17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-skl6/igt@kms_color@pipe-b-ctm-blue-to-red.html
[18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-skl3/igt@kms_color@pipe-b-ctm-blue-to-red.html
* igt@kms_cursor_crc@pipe-a-cursor-64x21-onscreen:
- shard-kbl: [PASS][19] -> [FAIL][20] ([i915#54] / [i915#93] / [i915#95])
[19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-kbl7/igt@kms_cursor_crc@pipe-a-cursor-64x21-onscreen.html
[20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-kbl3/igt@kms_cursor_crc@pipe-a-cursor-64x21-onscreen.html
- shard-apl: [PASS][21] -> [FAIL][22] ([i915#54] / [i915#95])
[21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-apl1/igt@kms_cursor_crc@pipe-a-cursor-64x21-onscreen.html
[22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-apl4/igt@kms_cursor_crc@pipe-a-cursor-64x21-onscreen.html
* igt@kms_draw_crc@draw-method-rgb565-blt-untiled:
- shard-glk: [PASS][23] -> [FAIL][24] ([i915#52] / [i915#54])
[23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-glk9/igt@kms_draw_crc@draw-method-rgb565-blt-untiled.html
[24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-glk8/igt@kms_draw_crc@draw-method-rgb565-blt-untiled.html
* igt@kms_fbcon_fbt@fbc-suspend:
- shard-apl: [PASS][25] -> [DMESG-WARN][26] ([i915#180] / [i915#95])
[25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-apl2/igt@kms_fbcon_fbt@fbc-suspend.html
[26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-apl4/igt@kms_fbcon_fbt@fbc-suspend.html
* igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-render:
- shard-snb: [PASS][27] -> [SKIP][28] ([fdo#109271]) +1 similar issue
[27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-snb5/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-render.html
[28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-snb1/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-render.html
* igt@kms_frontbuffer_tracking@fbc-suspend:
- shard-kbl: [PASS][29] -> [DMESG-WARN][30] ([i915#180] / [i915#93] / [i915#95])
[29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-kbl3/igt@kms_frontbuffer_tracking@fbc-suspend.html
[30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-kbl4/igt@kms_frontbuffer_tracking@fbc-suspend.html
* igt@kms_hdr@bpc-switch-suspend:
- shard-skl: [PASS][31] -> [FAIL][32] ([i915#1188])
[31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-skl9/igt@kms_hdr@bpc-switch-suspend.html
[32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-skl5/igt@kms_hdr@bpc-switch-suspend.html
* igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
- shard-kbl: [PASS][33] -> [DMESG-WARN][34] ([i915#180]) +2 similar issues
[33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-kbl3/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
[34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-kbl4/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
* igt@kms_psr@psr2_sprite_plane_move:
- shard-iclb: [PASS][35] -> [SKIP][36] ([fdo#109441]) +3 similar issues
[35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html
[36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb3/igt@kms_psr@psr2_sprite_plane_move.html
* igt@perf_pmu@init-busy-vcs1:
- shard-iclb: [PASS][37] -> [SKIP][38] ([fdo#112080]) +3 similar issues
[37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb1/igt@perf_pmu@init-busy-vcs1.html
[38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb6/igt@perf_pmu@init-busy-vcs1.html
* igt@prime_busy@hang-bsd2:
- shard-iclb: [PASS][39] -> [SKIP][40] ([fdo#109276]) +19 similar issues
[39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb1/igt@prime_busy@hang-bsd2.html
[40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb8/igt@prime_busy@hang-bsd2.html
#### Possible fixes ####
* igt@gem_ctx_persistence@close-replace-race:
- shard-tglb: [INCOMPLETE][41] ([i915#1492]) -> [PASS][42]
[41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-tglb8/igt@gem_ctx_persistence@close-replace-race.html
[42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-tglb7/igt@gem_ctx_persistence@close-replace-race.html
* igt@gem_exec_balancer@smoke:
- shard-iclb: [SKIP][43] ([fdo#110854]) -> [PASS][44]
[43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb5/igt@gem_exec_balancer@smoke.html
[44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb1/igt@gem_exec_balancer@smoke.html
* igt@gem_exec_parallel@vcs1-fds:
- shard-iclb: [SKIP][45] ([fdo#112080]) -> [PASS][46] +10 similar issues
[45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb3/igt@gem_exec_parallel@vcs1-fds.html
[46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb4/igt@gem_exec_parallel@vcs1-fds.html
* igt@gem_exec_schedule@implicit-write-read-bsd2:
- shard-iclb: [SKIP][47] ([fdo#109276] / [i915#677]) -> [PASS][48]
[47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb3/igt@gem_exec_schedule@implicit-write-read-bsd2.html
[48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb4/igt@gem_exec_schedule@implicit-write-read-bsd2.html
* igt@gem_exec_schedule@out-order-bsd2:
- shard-iclb: [SKIP][49] ([fdo#109276]) -> [PASS][50] +8 similar issues
[49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb5/igt@gem_exec_schedule@out-order-bsd2.html
[50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb1/igt@gem_exec_schedule@out-order-bsd2.html
* igt@gem_exec_schedule@pi-distinct-iova-bsd:
- shard-iclb: [SKIP][51] ([i915#677]) -> [PASS][52] +3 similar issues
[51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb2/igt@gem_exec_schedule@pi-distinct-iova-bsd.html
[52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb3/igt@gem_exec_schedule@pi-distinct-iova-bsd.html
* igt@gem_exec_schedule@reorder-wide-bsd:
- shard-iclb: [SKIP][53] ([fdo#112146]) -> [PASS][54] +3 similar issues
[53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb1/igt@gem_exec_schedule@reorder-wide-bsd.html
[54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb8/igt@gem_exec_schedule@reorder-wide-bsd.html
* igt@i915_selftest@live@requests:
- shard-tglb: [INCOMPLETE][55] ([i915#1531]) -> [PASS][56]
[55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-tglb7/igt@i915_selftest@live@requests.html
[56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-tglb5/igt@i915_selftest@live@requests.html
* igt@kms_cursor_crc@pipe-a-cursor-suspend:
- shard-kbl: [DMESG-WARN][57] ([i915#180]) -> [PASS][58] +4 similar issues
[57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-kbl4/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
[58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-kbl2/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
* igt@kms_cursor_legacy@flip-vs-cursor-legacy:
- shard-skl: [FAIL][59] ([IGT#5]) -> [PASS][60]
[59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-skl8/igt@kms_cursor_legacy@flip-vs-cursor-legacy.html
[60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-skl2/igt@kms_cursor_legacy@flip-vs-cursor-legacy.html
* igt@kms_dp_dsc@basic-dsc-enable-edp:
- shard-iclb: [SKIP][61] ([fdo#109349]) -> [PASS][62]
[61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb7/igt@kms_dp_dsc@basic-dsc-enable-edp.html
[62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html
* igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
- shard-apl: [DMESG-WARN][63] ([i915#180]) -> [PASS][64] +2 similar issues
[63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-apl4/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html
[64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-apl1/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html
* igt@kms_plane_lowres@pipe-a-tiling-x:
- shard-glk: [FAIL][65] ([i915#899]) -> [PASS][66]
[65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-glk8/igt@kms_plane_lowres@pipe-a-tiling-x.html
[66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-glk6/igt@kms_plane_lowres@pipe-a-tiling-x.html
* igt@kms_psr2_su@frontbuffer:
- shard-iclb: [SKIP][67] ([fdo#109642] / [fdo#111068]) -> [PASS][68]
[67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb3/igt@kms_psr2_su@frontbuffer.html
[68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb2/igt@kms_psr2_su@frontbuffer.html
* igt@kms_psr@psr2_cursor_plane_onoff:
- shard-iclb: [SKIP][69] ([fdo#109441]) -> [PASS][70] +1 similar issue
[69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-iclb7/igt@kms_psr@psr2_cursor_plane_onoff.html
[70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-iclb2/igt@kms_psr@psr2_cursor_plane_onoff.html
#### Warnings ####
* igt@i915_pm_dc@dc6-dpms:
- shard-tglb: [SKIP][71] ([i915#468]) -> [FAIL][72] ([i915#454])
[71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-tglb2/igt@i915_pm_dc@dc6-dpms.html
[72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-tglb1/igt@i915_pm_dc@dc6-dpms.html
* igt@i915_pm_rpm@system-suspend:
- shard-snb: [INCOMPLETE][73] ([i915#82]) -> [SKIP][74] ([fdo#109271])
[73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-snb1/igt@i915_pm_rpm@system-suspend.html
[74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-snb6/igt@i915_pm_rpm@system-suspend.html
* igt@runner@aborted:
- shard-tglb: [FAIL][75] ([i915#1389] / [i915#1485]) -> [FAIL][76] ([i915#1233] / [i915#529])
[75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8197/shard-tglb8/igt@runner@aborted.html
[76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/shard-tglb5/igt@runner@aborted.html
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[IGT#5]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/5
[fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
[fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
[fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349
[fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
[fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
[fdo#110854]: https://bugs.freedesktop.org/show_bug.cgi?id=110854
[fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
[fdo#112080]: https://bugs.freedesktop.org/show_bug.cgi?id=112080
[fdo#112146]: https://bugs.freedesktop.org/show_bug.cgi?id=112146
[i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
[i915#1233]: https://gitlab.freedesktop.org/drm/intel/issues/1233
[i915#129]: https://gitlab.freedesktop.org/drm/intel/issues/129
[i915#1389]: https://gitlab.freedesktop.org/drm/intel/issues/1389
[i915#1485]: https://gitlab.freedesktop.org/drm/intel/issues/1485
[i915#1492]: https://gitlab.freedesktop.org/drm/intel/issues/1492
[i915#1526]: https://gitlab.freedesktop.org/drm/intel/issues/1526
[i915#1531]: https://gitlab.freedesktop.org/drm/intel/issues/1531
[i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
[i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
[i915#468]: https://gitlab.freedesktop.org/drm/intel/issues/468
[i915#478]: https://gitlab.freedesktop.org/drm/intel/issues/478
[i915#52]: https://gitlab.freedesktop.org/drm/intel/issues/52
[i915#529]: https://gitlab.freedesktop.org/drm/intel/issues/529
[i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
[i915#677]: https://gitlab.freedesktop.org/drm/intel/issues/677
[i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69
[i915#82]: https://gitlab.freedesktop.org/drm/intel/issues/82
[i915#899]: https://gitlab.freedesktop.org/drm/intel/issues/899
[i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
[i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95
Participating hosts (10 -> 10)
------------------------------
No changes in participating hosts
Build changes
-------------
* CI: CI-20190529 -> None
* Linux: CI_DRM_8197 -> Patchwork_17106
CI-20190529: 20190529
CI_DRM_8197: 198bab1da198b9d6d5c36d52704dd4abab6e81a8 @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_5539: e7aae12e37771a8b7796ba252574eb832a5839c3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_17106: 185b85c7368afa95d0c352b09e7ac161ae19e2bc @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17106/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Intel-gfx] [PATCH 1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context
@ 2020-03-26 22:35 Chris Wilson
2020-03-26 22:35 ` [Intel-gfx] [PATCH 2/2] drm/i915/execlists: Explicitly reset both reg and context runtime Chris Wilson
0 siblings, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2020-03-26 22:35 UTC (permalink / raw)
To: intel-gfx
In what seems remarkably similar to the w/a required to not reload an
idle context with HEAD==TAIL, it appears we must prevent the HW from
switching to an idle context in ELSP[1], while simultaneously trying to
preempt the HW to run another context and a continuation of the idle
context (which is no longer idle).
process_csb: vecs0: cs-irq head=0, tail=1
process_csb: vecs0: csb[1]: status=0x00000882:0x00000020
trace_ports: vecs0: preempted { 8c0:30!, 0:0 }
trace_ports: vecs0: promote { 8b2:32!, 8c0:30 }
trace_ports: vecs0: submit { 8b8:32, 8c0:32 }
process_csb: vecs0: cs-irq head=1, tail=2
process_csb: vecs0: csb[2]: status=0x00000814:0x00000040
trace_ports: vecs0: completed { 8b2:32!, 8c0:30 }
process_csb: vecs0: cs-irq head=2, tail=5
process_csb: vecs0: csb[3]: status=0x00000812:0x00000020
trace_ports: vecs0: preempted { 8c0:30!, 0:0 }
trace_ports: vecs0: promote { 8b8:32!, 8c0:32 }
process_csb: vecs0: csb[4]: status=0x00000814:0x00000060
trace_ports: vecs0: completed { 8b8:32!, 8c0:32 }
process_csb: vecs0: csb[5]: status=0x00000818:0x00000020
trace_ports: vecs0: completed { 8c0:32, 0:0 }
process_csb: vecs0: ring:{start:0x00021000, head:03f8, tail:03f8, ctl:00000000, mode:00000200}
process_csb: vecs0: rq:{start:00021000, head:03c0, tail:0400, seqno:8c0:32, hwsp:30},
process_csb: vecs0: ctx:{start:00021000, head:03f8, tail:03f8},
process_csb: GEM_BUG_ON("context completed before request")
Fortunately, we just so happen to have a semaphore in place to prevent
the ring HEAD from proceeding past the end of a request that we can use
to fix the HEAD in position as we reprogram ELSP.
Closes: https://gitlab.freedesktop.org/drm/intel/issues/1501
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 68 ++++++++++++++---------------
1 file changed, 34 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b12355048501..132816235d8a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1854,7 +1854,7 @@ static inline void clear_ports(struct i915_request **ports, int count)
memset_p((void **)ports, NULL, count);
}
-static void execlists_dequeue(struct intel_engine_cs *engine)
+static bool execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request **port = execlists->pending;
@@ -1928,13 +1928,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
execlists->queue_priority_hint);
record_preemption(execlists);
- /*
- * Don't let the RING_HEAD advance past the breadcrumb
- * as we unwind (and until we resubmit) so that we do
- * not accidentally tell it to go backwards.
- */
- ring_set_paused(engine, 1);
-
/*
* Note that we have not stopped the GPU at this point,
* so we are unwinding the incomplete requests as they
@@ -1954,7 +1947,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
last->sched.attr.priority,
execlists->queue_priority_hint);
- ring_set_paused(engine, 1);
defer_active(engine);
/*
@@ -1988,7 +1980,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* of timeslices, our queue might be.
*/
start_timeslice(engine);
- return;
+ return false;
}
}
}
@@ -2021,9 +2013,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
if (last && !can_merge_rq(last, rq)) {
+ /* leave this for another sibling */
spin_unlock(&ve->base.active.lock);
start_timeslice(engine);
- return; /* leave this for another sibling */
+ return false;
}
ENGINE_TRACE(engine,
@@ -2193,32 +2186,30 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* interrupt for secondary ports).
*/
execlists->queue_priority_hint = queue_prio(execlists);
+ if (!submit)
+ return false;
- if (submit) {
- *port = execlists_schedule_in(last, port - execlists->pending);
- execlists->switch_priority_hint =
- switch_prio(engine, *execlists->pending);
-
- /*
- * Skip if we ended up with exactly the same set of requests,
- * e.g. trying to timeslice a pair of ordered contexts
- */
- if (!memcmp(active, execlists->pending,
- (port - execlists->pending + 1) * sizeof(*port))) {
- do
- execlists_schedule_out(fetch_and_zero(port));
- while (port-- != execlists->pending);
+ *port = execlists_schedule_in(last, port - execlists->pending);
+ execlists->switch_priority_hint =
+ switch_prio(engine, *execlists->pending);
- goto skip_submit;
- }
- clear_ports(port + 1, last_port - port);
+ /*
+ * Skip if we ended up with exactly the same set of requests,
+ * e.g. trying to timeslice a pair of ordered contexts
+ */
+ if (!memcmp(active, execlists->pending,
+ (port - execlists->pending + 1) * sizeof(*port))) {
+ do
+ execlists_schedule_out(fetch_and_zero(port));
+ while (port-- != execlists->pending);
- execlists_submit_ports(engine);
- set_preempt_timeout(engine, *active);
- } else {
-skip_submit:
- ring_set_paused(engine, 0);
+ return false;
}
+ clear_ports(port + 1, last_port - port);
+
+ execlists_submit_ports(engine);
+ set_preempt_timeout(engine, *active);
+ return true;
}
static void
@@ -2478,7 +2469,16 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
lockdep_assert_held(&engine->active.lock);
if (!READ_ONCE(engine->execlists.pending[0])) {
rcu_read_lock(); /* protect peeking at execlists->active */
- execlists_dequeue(engine);
+
+ /*
+ * Don't let the RING_HEAD advance past the breadcrumb
+ * as we unwind (and until we resubmit) so that we do
+ * not accidentally tell it to go backwards.
+ */
+ ring_set_paused(engine, 1);
+ if (!execlists_dequeue(engine))
+ ring_set_paused(engine, 0);
+
rcu_read_unlock();
}
}
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Intel-gfx] [PATCH 2/2] drm/i915/execlists: Explicitly reset both reg and context runtime
2020-03-26 22:35 [Intel-gfx] [PATCH 1/2] " Chris Wilson
@ 2020-03-26 22:35 ` Chris Wilson
0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2020-03-26 22:35 UTC (permalink / raw)
To: intel-gfx
Upon a GPU reset, we copy the default context image over top of the
guilty image. This will rollback the CTX_TIMESTAMP register to before
our value of ce->runtime.last. Reset both back to 0 so that we do not
encounter an underflow on the next schedule out after resume.
This should not be a huge issue in practice, as hangs should be rare in
correct code.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 132816235d8a..987dbdf2542e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -238,6 +238,17 @@ __execlists_update_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine,
u32 head);
+static u32 intel_context_get_runtime(const struct intel_context *ce)
+{
+ /*
+ * We can use either ppHWSP[16] which is recorded before the context
+ * switch (and so excludes the cost of context switches) or use the
+ * value from the context image itself, which is saved/restored earlier
+ * and so includes the cost of the save.
+ */
+ return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
static void mark_eio(struct i915_request *rq)
{
if (i915_request_completed(rq))
@@ -1154,6 +1165,7 @@ static void restore_default_state(struct intel_context *ce,
engine->context_size - PAGE_SIZE);
execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+ ce->runtime.last = intel_context_get_runtime(ce);
}
static void reset_active(struct i915_request *rq,
@@ -1195,17 +1207,6 @@ static void reset_active(struct i915_request *rq,
ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}
-static u32 intel_context_get_runtime(const struct intel_context *ce)
-{
- /*
- * We can use either ppHWSP[16] which is recorded before the context
- * switch (and so excludes the cost of context switches) or use the
- * value from the context image itself, which is saved/restored earlier
- * and so includes the cost of the save.
- */
- return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
-}
-
static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
{
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -4581,6 +4582,7 @@ static void init_common_reg_state(u32 * const regs,
regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ regs[CTX_TIMESTAMP] = 0;
}
static void init_wa_bb_reg_state(u32 * const regs,
--
2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2020-03-27 16:01 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-26 23:18 [Intel-gfx] [PATCH 1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context Chris Wilson
2020-03-26 23:18 ` [Intel-gfx] [PATCH 2/2] drm/i915/execlists: Explicitly reset both reg and context runtime Chris Wilson
2020-03-27 16:01 ` Tvrtko Ursulin
2020-03-27 0:15 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] drm/i915/execlists: Prevent GPU death on ELSP[1] promotion to idle context Patchwork
2020-03-27 0:52 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-03-27 13:59 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
-- strict thread matches above, loose matches on Subject: below --
2020-03-26 22:35 [Intel-gfx] [PATCH 1/2] " Chris Wilson
2020-03-26 22:35 ` [Intel-gfx] [PATCH 2/2] drm/i915/execlists: Explicitly reset both reg and context runtime Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.