From: Matthew Brost <matthew.brost@intel.com>
To: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH 44/51] drm/i915/selftest: Better error reporting from hangcheck selftest
Date: Fri, 16 Jul 2021 13:13:00 -0700 [thread overview]
Message-ID: <20210716201258.GA27739@sdutt-i7> (raw)
In-Reply-To: <20210716201724.54804-45-matthew.brost@intel.com>
On Fri, Jul 16, 2021 at 01:17:17PM -0700, Matthew Brost wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> There are many ways in which the hangcheck selftest can fail. Very few
> of them actually printed an error message to say what happened. So,
> fill in the missing messages.
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> ---
> drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 89 ++++++++++++++++----
> 1 file changed, 72 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> index 7aea10aa1fb4..0ed87cc4d063 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
> @@ -378,6 +378,7 @@ static int igt_reset_nop(void *arg)
> ce = intel_context_create(engine);
> if (IS_ERR(ce)) {
> err = PTR_ERR(ce);
> + pr_err("[%s] Create context failed: %d!\n", engine->name, err);
> break;
> }
>
> @@ -387,6 +388,7 @@ static int igt_reset_nop(void *arg)
> rq = intel_context_create_request(ce);
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> + pr_err("[%s] Create request failed: %d!\n", engine->name, err);
> break;
> }
>
> @@ -401,24 +403,31 @@ static int igt_reset_nop(void *arg)
> igt_global_reset_unlock(gt);
>
> if (intel_gt_is_wedged(gt)) {
> + pr_err("[%s] GT is wedged!\n", engine->name);
> err = -EIO;
> break;
> }
>
> if (i915_reset_count(global) != reset_count + ++count) {
> - pr_err("Full GPU reset not recorded!\n");
> + pr_err("[%s] Reset not recorded: %d vs %d + %d!\n",
> + engine->name, i915_reset_count(global), reset_count, count);
> err = -EINVAL;
> break;
> }
>
> err = igt_flush_test(gt->i915);
> - if (err)
> + if (err) {
> + pr_err("[%s] Flush failed: %d!\n", engine->name, err);
> break;
> + }
> } while (time_before(jiffies, end_time));
> pr_info("%s: %d resets\n", __func__, count);
>
> - if (igt_flush_test(gt->i915))
> + if (igt_flush_test(gt->i915)) {
> + pr_err("Post flush failed: %d!\n", err);
> err = -EIO;
> + }
> +
> return err;
> }
>
> @@ -441,8 +450,10 @@ static int igt_reset_nop_engine(void *arg)
> int err;
>
> ce = intel_context_create(engine);
> - if (IS_ERR(ce))
> + if (IS_ERR(ce)) {
> + pr_err("[%s] Create context failed: %d!\n", engine->name, err);
> return PTR_ERR(ce);
> + }
>
> reset_count = i915_reset_count(global);
> reset_engine_count = i915_reset_engine_count(global, engine);
> @@ -550,8 +561,10 @@ static int igt_reset_fail_engine(void *arg)
> int err;
>
> ce = intel_context_create(engine);
> - if (IS_ERR(ce))
> + if (IS_ERR(ce)) {
> + pr_err("[%s] Create context failed: %d!\n", engine->name, err);
> return PTR_ERR(ce);
> + }
>
> st_engine_heartbeat_disable(engine);
> set_bit(I915_RESET_ENGINE + id, >->reset.flags);
> @@ -711,6 +724,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
> rq = hang_create_request(&h, engine);
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err);
> break;
> }
>
> @@ -765,12 +779,16 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
> break;
>
> err = igt_flush_test(gt->i915);
> - if (err)
> + if (err) {
> + pr_err("[%s] Flush failed: %d!\n", engine->name, err);
> break;
> + }
> }
>
> - if (intel_gt_is_wedged(gt))
> + if (intel_gt_is_wedged(gt)) {
> + pr_err("GT is wedged!\n");
> err = -EIO;
> + }
>
> if (active)
> hang_fini(&h);
> @@ -837,6 +855,7 @@ static int active_engine(void *data)
> ce[count] = intel_context_create(engine);
> if (IS_ERR(ce[count])) {
> err = PTR_ERR(ce[count]);
> + pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err);
> while (--count)
> intel_context_put(ce[count]);
> return err;
> @@ -852,6 +871,7 @@ static int active_engine(void *data)
> new = intel_context_create_request(ce[idx]);
> if (IS_ERR(new)) {
> err = PTR_ERR(new);
> + pr_err("[%s] Create request #%d failed: %d!\n", engine->name, idx, err);
> break;
> }
>
> @@ -867,8 +887,10 @@ static int active_engine(void *data)
> }
>
> err = active_request_put(old);
> - if (err)
> + if (err) {
> + pr_err("[%s] Request put failed: %d!\n", engine->name, err);
> break;
> + }
>
> cond_resched();
> }
> @@ -876,6 +898,9 @@ static int active_engine(void *data)
> for (count = 0; count < ARRAY_SIZE(rq); count++) {
> int err__ = active_request_put(rq[count]);
>
> + if (err)
> + pr_err("[%s] Request put #%ld failed: %d!\n", engine->name, count, err);
> +
> /* Keep the first error */
> if (!err)
> err = err__;
> @@ -949,6 +974,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
> "igt/%s", other->name);
> if (IS_ERR(tsk)) {
> err = PTR_ERR(tsk);
> + pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err);
> goto unwind;
> }
>
> @@ -967,6 +993,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
> rq = hang_create_request(&h, engine);
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err);
> break;
> }
>
> @@ -999,10 +1026,10 @@ static int __igt_reset_engines(struct intel_gt *gt,
> if (rq) {
> if (rq->fence.error != -EIO) {
> pr_err("i915_reset_engine(%s:%s):"
> - " failed to reset request %llx:%lld\n",
> + " failed to reset request %lld:%lld [0x%04X]\n",
> engine->name, test_name,
> rq->fence.context,
> - rq->fence.seqno);
> + rq->fence.seqno, rq->context->guc_id);
> i915_request_put(rq);
>
> GEM_TRACE_DUMP();
> @@ -1101,8 +1128,10 @@ static int __igt_reset_engines(struct intel_gt *gt,
> break;
>
> err = igt_flush_test(gt->i915);
> - if (err)
> + if (err) {
> + pr_err("[%s] Flush failed: %d!\n", engine->name, err);
> break;
> + }
> }
>
> if (intel_gt_is_wedged(gt))
> @@ -1180,12 +1209,15 @@ static int igt_reset_wait(void *arg)
> igt_global_reset_lock(gt);
>
> err = hang_init(&h, gt);
> - if (err)
> + if (err) {
> + pr_err("[%s] Hang init failed: %d!\n", engine->name, err);
> goto unlock;
> + }
>
> rq = hang_create_request(&h, engine);
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err);
> goto fini;
> }
>
> @@ -1310,12 +1342,15 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
> /* Check that we can recover an unbind stuck on a hanging request */
>
> err = hang_init(&h, gt);
> - if (err)
> + if (err) {
> + pr_err("[%s] Hang init failed: %d!\n", engine->name, err);
> return err;
> + }
>
> obj = i915_gem_object_create_internal(gt->i915, SZ_1M);
> if (IS_ERR(obj)) {
> err = PTR_ERR(obj);
> + pr_err("[%s] Create object failed: %d!\n", engine->name, err);
> goto fini;
> }
>
> @@ -1330,12 +1365,14 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
> arg.vma = i915_vma_instance(obj, vm, NULL);
> if (IS_ERR(arg.vma)) {
> err = PTR_ERR(arg.vma);
> + pr_err("[%s] VMA instance failed: %d!\n", engine->name, err);
> goto out_obj;
> }
>
> rq = hang_create_request(&h, engine);
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err);
> goto out_obj;
> }
>
> @@ -1347,6 +1384,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
> err = i915_vma_pin(arg.vma, 0, 0, pin_flags);
> if (err) {
> i915_request_add(rq);
> + pr_err("[%s] VMA pin failed: %d!\n", engine->name, err);
> goto out_obj;
> }
>
> @@ -1363,8 +1401,14 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
> i915_vma_lock(arg.vma);
> err = i915_request_await_object(rq, arg.vma->obj,
> flags & EXEC_OBJECT_WRITE);
> - if (err == 0)
> + if (err == 0) {
> err = i915_vma_move_to_active(arg.vma, rq, flags);
> + if (err)
> + pr_err("[%s] Move to active failed: %d!\n", engine->name, err);
> + } else {
> + pr_err("[%s] Request await failed: %d!\n", engine->name, err);
> + }
> +
> i915_vma_unlock(arg.vma);
>
> if (flags & EXEC_OBJECT_NEEDS_FENCE)
> @@ -1392,6 +1436,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
> tsk = kthread_run(fn, &arg, "igt/evict_vma");
> if (IS_ERR(tsk)) {
> err = PTR_ERR(tsk);
> + pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err);
> tsk = NULL;
> goto out_reset;
> }
> @@ -1518,6 +1563,7 @@ static int igt_reset_queue(void *arg)
> prev = hang_create_request(&h, engine);
> if (IS_ERR(prev)) {
> err = PTR_ERR(prev);
> + pr_err("[%s] Create 'prev' hang request failed: %d!\n", engine->name, err);
> goto fini;
> }
>
> @@ -1532,6 +1578,7 @@ static int igt_reset_queue(void *arg)
> rq = hang_create_request(&h, engine);
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err);
> goto fini;
> }
>
> @@ -1619,8 +1666,10 @@ static int igt_reset_queue(void *arg)
> i915_request_put(prev);
>
> err = igt_flush_test(gt->i915);
> - if (err)
> + if (err) {
> + pr_err("[%s] Flush failed: %d!\n", engine->name, err);
> break;
> + }
> }
>
> fini:
> @@ -1653,12 +1702,15 @@ static int igt_handle_error(void *arg)
> return 0;
>
> err = hang_init(&h, gt);
> - if (err)
> + if (err) {
> + pr_err("[%s] Hang init failed: %d!\n", engine->name, err);
> return err;
> + }
>
> rq = hang_create_request(&h, engine);
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err);
> goto err_fini;
> }
>
> @@ -1743,12 +1795,15 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
> return err;
>
> err = hang_init(&h, engine->gt);
> - if (err)
> + if (err) {
> + pr_err("[%s] Hang init failed: %d!\n", engine->name, err);
> return err;
> + }
>
> rq = hang_create_request(&h, engine);
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err);
> goto out;
> }
>
> --
> 2.28.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2021-07-16 20:24 UTC|newest]
Thread overview: 110+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-16 20:16 [PATCH 00/51] GuC submission support Matthew Brost
2021-07-16 20:16 ` [PATCH 01/51] drm/i915/guc: Add new GuC interface defines and structures Matthew Brost
2021-07-16 20:16 ` [PATCH 02/51] drm/i915/guc: Remove GuC stage descriptor, add LRC descriptor Matthew Brost
2021-07-16 20:16 ` [PATCH 03/51] drm/i915/guc: Add LRC descriptor context lookup array Matthew Brost
2021-07-16 20:16 ` [PATCH 04/51] drm/i915/guc: Implement GuC submission tasklet Matthew Brost
2021-07-19 23:01 ` John Harrison
2021-07-19 22:55 ` Matthew Brost
2021-07-20 0:26 ` John Harrison
2021-07-16 20:16 ` [PATCH 05/51] drm/i915/guc: Add bypass tasklet submission path to GuC Matthew Brost
2021-07-16 20:16 ` [PATCH 06/51] drm/i915/guc: Implement GuC context operations for new inteface Matthew Brost
2021-07-20 0:23 ` John Harrison
2021-07-20 2:45 ` Matthew Brost
2021-07-20 0:51 ` Daniele Ceraolo Spurio
2021-07-20 4:04 ` Matthew Brost
2021-07-21 23:51 ` Daniele Ceraolo Spurio
2021-07-22 7:57 ` [Intel-gfx] " Michal Wajdeczko
2021-07-22 15:48 ` Matthew Brost
2021-07-16 20:16 ` [PATCH 07/51] drm/i915/guc: Insert fence on context when deregistering Matthew Brost
2021-07-16 20:16 ` [PATCH 08/51] drm/i915/guc: Defer context unpin until scheduling is disabled Matthew Brost
2021-07-16 20:16 ` [PATCH 09/51] drm/i915/guc: Disable engine barriers with GuC during unpin Matthew Brost
2021-07-16 20:16 ` [PATCH 10/51] drm/i915/guc: Extend deregistration fence to schedule disable Matthew Brost
2021-07-16 20:16 ` [PATCH 11/51] drm/i915: Disable preempt busywait when using GuC scheduling Matthew Brost
2021-07-16 20:16 ` [PATCH 12/51] drm/i915/guc: Ensure request ordering via completion fences Matthew Brost
2021-07-19 23:46 ` Daniele Ceraolo Spurio
2021-07-20 2:48 ` Matthew Brost
2021-07-20 2:50 ` Matthew Brost
2021-07-16 20:16 ` [PATCH 13/51] drm/i915/guc: Disable semaphores when using GuC scheduling Matthew Brost
2021-07-20 0:33 ` John Harrison
2021-07-16 20:16 ` [PATCH 14/51] drm/i915/guc: Ensure G2H response has space in buffer Matthew Brost
2021-07-16 20:16 ` [PATCH 15/51] drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC Matthew Brost
2021-07-20 1:03 ` John Harrison
2021-07-20 1:53 ` Matthew Brost
2021-07-20 19:49 ` John Harrison
2021-07-16 20:16 ` [PATCH 16/51] drm/i915/guc: Update GuC debugfs to support new GuC Matthew Brost
2021-07-20 1:13 ` John Harrison
2021-07-16 20:16 ` [PATCH 17/51] drm/i915/guc: Add several request trace points Matthew Brost
2021-07-20 1:27 ` John Harrison
2021-07-20 2:10 ` Matthew Brost
2021-07-16 20:16 ` [PATCH 18/51] drm/i915: Add intel_context tracing Matthew Brost
2021-07-16 20:16 ` [PATCH 19/51] drm/i915/guc: GuC virtual engines Matthew Brost
2021-07-19 23:33 ` Daniele Ceraolo Spurio
2021-07-19 23:27 ` Matthew Brost
2021-07-19 23:42 ` Daniele Ceraolo Spurio
2021-07-19 23:32 ` Matthew Brost
2021-07-16 20:16 ` [PATCH 20/51] drm/i915: Track 'serial' counts for " Matthew Brost
2021-07-20 1:28 ` John Harrison
2021-07-20 1:54 ` Matthew Brost
2021-07-20 16:47 ` Matthew Brost
2021-07-16 20:16 ` [PATCH 21/51] drm/i915: Hold reference to intel_context over life of i915_request Matthew Brost
2021-07-16 20:16 ` [PATCH 22/51] drm/i915/guc: Disable bonding extension with GuC submission Matthew Brost
2021-07-16 20:16 ` [PATCH 23/51] drm/i915/guc: Direct all breadcrumbs for a class to single breadcrumbs Matthew Brost
2021-07-20 19:45 ` John Harrison
2021-07-22 12:46 ` [Intel-gfx] " Tvrtko Ursulin
2021-07-26 22:25 ` Matthew Brost
2021-07-16 20:16 ` [PATCH 24/51] drm/i915: Add i915_sched_engine destroy vfunc Matthew Brost
2021-07-20 19:55 ` John Harrison
2021-07-20 19:53 ` Matthew Brost
2021-07-16 20:16 ` [PATCH 25/51] drm/i915: Move active request tracking to a vfunc Matthew Brost
2021-07-20 20:05 ` John Harrison
2021-07-16 20:16 ` [PATCH 26/51] drm/i915/guc: Reset implementation for new GuC interface Matthew Brost
2021-07-20 20:19 ` John Harrison
2021-07-20 20:59 ` Matthew Brost
2021-07-16 20:17 ` [PATCH 27/51] drm/i915: Reset GPU immediately if submission is disabled Matthew Brost
2021-07-16 20:17 ` [PATCH 28/51] drm/i915/guc: Add disable interrupts to guc sanitize Matthew Brost
2021-07-16 20:17 ` [PATCH 29/51] drm/i915/guc: Suspend/resume implementation for new interface Matthew Brost
2021-07-16 20:17 ` [PATCH 30/51] drm/i915/guc: Handle context reset notification Matthew Brost
2021-07-20 20:29 ` John Harrison
2021-07-20 20:38 ` Matthew Brost
2021-07-16 20:17 ` [PATCH 31/51] drm/i915/guc: Handle engine reset failure notification Matthew Brost
2021-07-16 20:17 ` [PATCH 32/51] drm/i915/guc: Enable the timer expired interrupt for GuC Matthew Brost
2021-07-16 20:17 ` [PATCH 33/51] drm/i915/guc: Provide mmio list to be saved/restored on engine reset Matthew Brost
2021-07-22 4:47 ` Matthew Brost
2021-07-16 20:17 ` [PATCH 34/51] drm/i915/guc: Don't complain about reset races Matthew Brost
2021-07-16 20:17 ` [PATCH 35/51] drm/i915/guc: Enable GuC engine reset Matthew Brost
2021-07-16 20:17 ` [PATCH 36/51] drm/i915/guc: Capture error state on context reset Matthew Brost
2021-07-16 20:17 ` [PATCH 37/51] drm/i915/guc: Fix for error capture after full GPU reset with GuC Matthew Brost
2021-07-16 20:17 ` [PATCH 38/51] drm/i915/guc: Hook GuC scheduling policies up Matthew Brost
2021-07-16 20:17 ` [PATCH 39/51] drm/i915/guc: Connect reset modparam updates to GuC policy flags Matthew Brost
2021-07-16 20:04 ` Matthew Brost
2021-07-16 20:17 ` [PATCH 40/51] drm/i915/guc: Include scheduling policies in the debugfs state dump Matthew Brost
2021-07-16 20:17 ` [PATCH 41/51] drm/i915/guc: Add golden context to GuC ADS Matthew Brost
2021-07-19 17:24 ` [Intel-gfx] " Matthew Brost
2021-07-19 18:25 ` John Harrison
2021-07-19 18:30 ` Matthew Brost
2021-07-16 20:17 ` [PATCH 42/51] drm/i915/guc: Implement banned contexts for GuC submission Matthew Brost
2021-07-20 21:41 ` John Harrison
2021-07-16 20:17 ` [PATCH 43/51] drm/i915/guc: Support request cancellation Matthew Brost
2021-07-22 19:56 ` Daniele Ceraolo Spurio
2021-07-22 20:13 ` Matthew Brost
2021-07-16 20:17 ` [PATCH 44/51] drm/i915/selftest: Better error reporting from hangcheck selftest Matthew Brost
2021-07-16 20:13 ` Matthew Brost [this message]
2021-07-16 20:17 ` [PATCH 45/51] drm/i915/selftest: Fix workarounds selftest for GuC submission Matthew Brost
2021-07-20 17:14 ` [Intel-gfx] " Matthew Brost
2021-07-16 20:17 ` [PATCH 46/51] drm/i915/selftest: Fix MOCS " Matthew Brost
2021-07-16 23:57 ` Matthew Brost
2021-07-16 20:17 ` [PATCH 47/51] drm/i915/selftest: Increase some timeouts in live_requests Matthew Brost
2021-07-20 21:46 ` John Harrison
2021-07-22 8:13 ` [Intel-gfx] " Tvrtko Ursulin
2021-07-16 20:17 ` [PATCH 48/51] drm/i915/selftest: Fix hangcheck self test for GuC submission Matthew Brost
2021-07-16 23:43 ` Matthew Brost
2021-07-16 20:17 ` [PATCH 49/51] drm/i915/selftest: Bump selftest timeouts for hangcheck Matthew Brost
2021-07-16 22:23 ` Matthew Brost
2021-07-22 8:17 ` [Intel-gfx] " Tvrtko Ursulin
2021-07-16 20:17 ` [PATCH 50/51] drm/i915/guc: Implement GuC priority management Matthew Brost
2021-07-22 20:26 ` Daniele Ceraolo Spurio
2021-07-22 21:38 ` Matthew Brost
2021-07-22 21:50 ` Daniele Ceraolo Spurio
2021-07-22 21:55 ` Matthew Brost
2021-07-16 20:17 ` [PATCH 51/51] drm/i915/guc: Unblock GuC submission on Gen11+ Matthew Brost
2021-07-19 9:06 ` [Intel-gfx] [PATCH 00/51] GuC submission support Tvrtko Ursulin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210716201258.GA27739@sdutt-i7 \
--to=matthew.brost@intel.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).