* [CI 1/2] drm/i915/selftests: Flush old resets between engines
@ 2018-02-05 14:34 Chris Wilson
2018-02-05 14:34 ` [CI 2/2] drm/i915/selftests: Use a sacrificial context for hang testing Chris Wilson
2018-02-05 14:54 ` ✗ Fi.CI.BAT: warning for series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines Patchwork
0 siblings, 2 replies; 3+ messages in thread
From: Chris Wilson @ 2018-02-05 14:34 UTC (permalink / raw)
To: intel-gfx
When injecting rapid resets, we must be careful to at least wait for the
previous reset to have taken effect and the engine restarted. If we
perform a second reset before that has happened, we will notice that the
engine hasn't recovered and declare it lost, wedging the device and
failing. In practice, since we wait for each hanging batch to start
before injecting the reset, this too-fast-reset condition can only be
triggered when moving onto the next engine in the test, so we need only
wait for the existing reset to complete before switching engines.
v2: Wrap up the wait inside a safety net to bail out in case of angry hw.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 66 ++++++++++++++++++++++--
1 file changed, 63 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index d1f91a533afa..a42c539c1efe 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -244,6 +244,58 @@ static u32 hws_seqno(const struct hang *h,
return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
}
+struct wedge_me {
+ struct delayed_work work;
+ struct drm_i915_private *i915;
+ const void *symbol;
+};
+
+static void wedge_me(struct work_struct *work)
+{
+ struct wedge_me *w = container_of(work, typeof(*w), work.work);
+
+ pr_err("%pS timed out, cancelling all further testing.\n",
+ w->symbol);
+ i915_gem_set_wedged(w->i915);
+}
+
+static void __init_wedge(struct wedge_me *w,
+ struct drm_i915_private *i915,
+ long timeout,
+ const void *symbol)
+{
+ w->i915 = i915;
+ w->symbol = symbol;
+
+ INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
+ schedule_delayed_work(&w->work, timeout);
+}
+
+static void __fini_wedge(struct wedge_me *w)
+{
+ cancel_delayed_work_sync(&w->work);
+ destroy_delayed_work_on_stack(&w->work);
+ w->i915 = NULL;
+}
+
+#define wedge_on_timeout(W, DEV, TIMEOUT) \
+ for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \
+ (W)->i915; \
+ __fini_wedge((W)))
+
+static noinline int
+flush_test(struct drm_i915_private *i915, unsigned int flags)
+{
+ struct wedge_me w;
+
+ cond_resched();
+
+ wedge_on_timeout(&w, i915, HZ)
+ i915_gem_wait_for_idle(i915, flags);
+
+ return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
+}
+
static void hang_fini(struct hang *h)
{
*h->batch = MI_BATCH_BUFFER_END;
@@ -255,7 +307,7 @@ static void hang_fini(struct hang *h)
i915_gem_object_unpin_map(h->hws);
i915_gem_object_put(h->hws);
- i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
+ flush_test(h->i915, I915_WAIT_LOCKED);
}
static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
@@ -487,7 +539,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
if (err)
break;
- cond_resched();
+ err = flush_test(i915, 0);
+ if (err)
+ break;
}
if (i915_terminally_wedged(&i915->gpu_error))
@@ -726,7 +780,9 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915,
if (err)
break;
- cond_resched();
+ err = flush_test(i915, 0);
+ if (err)
+ break;
}
if (i915_terminally_wedged(&i915->gpu_error))
@@ -952,6 +1008,10 @@ static int igt_reset_queue(void *arg)
i915_gem_chipset_flush(i915);
i915_gem_request_put(prev);
+
+ err = flush_test(i915, I915_WAIT_LOCKED);
+ if (err)
+ break;
}
fini:
--
2.15.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [CI 2/2] drm/i915/selftests: Use a sacrificial context for hang testing
2018-02-05 14:34 [CI 1/2] drm/i915/selftests: Flush old resets between engines Chris Wilson
@ 2018-02-05 14:34 ` Chris Wilson
2018-02-05 14:54 ` ✗ Fi.CI.BAT: warning for series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines Patchwork
1 sibling, 0 replies; 3+ messages in thread
From: Chris Wilson @ 2018-02-05 14:34 UTC (permalink / raw)
To: intel-gfx
Avoid injecting hangs in to the i915->kernel_context in case the GPU
reset leaves corruption in the context image in its wake (leading to
continual failures and system hangs after the selftests are ostensibly
complete). Use a sacrificial kernel_context instead.
v2: Closing a context is tricky; export a function (for selftests) from
i915_gem_context.c to get it right.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 39 +++++++++++++-----------
drivers/gpu/drm/i915/selftests/mock_context.c | 11 +++++++
drivers/gpu/drm/i915/selftests/mock_context.h | 3 ++
3 files changed, 36 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index a42c539c1efe..d1d2c2456f69 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -33,6 +33,7 @@ struct hang {
struct drm_i915_private *i915;
struct drm_i915_gem_object *hws;
struct drm_i915_gem_object *obj;
+ struct i915_gem_context *ctx;
u32 *seqno;
u32 *batch;
};
@@ -45,9 +46,15 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915)
memset(h, 0, sizeof(*h));
h->i915 = i915;
+ h->ctx = kernel_context(i915);
+ if (IS_ERR(h->ctx))
+ return PTR_ERR(h->ctx);
+
h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(h->hws))
- return PTR_ERR(h->hws);
+ if (IS_ERR(h->hws)) {
+ err = PTR_ERR(h->hws);
+ goto err_ctx;
+ }
h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(h->obj)) {
@@ -79,6 +86,8 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915)
i915_gem_object_put(h->obj);
err_hws:
i915_gem_object_put(h->hws);
+err_ctx:
+ kernel_context_close(h->ctx);
return err;
}
@@ -196,9 +205,7 @@ static int emit_recurse_batch(struct hang *h,
}
static struct drm_i915_gem_request *
-hang_create_request(struct hang *h,
- struct intel_engine_cs *engine,
- struct i915_gem_context *ctx)
+hang_create_request(struct hang *h, struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *rq;
int err;
@@ -225,7 +232,7 @@ hang_create_request(struct hang *h,
h->batch = vaddr;
}
- rq = i915_gem_request_alloc(engine, ctx);
+ rq = i915_gem_request_alloc(engine, h->ctx);
if (IS_ERR(rq))
return rq;
@@ -307,6 +314,8 @@ static void hang_fini(struct hang *h)
i915_gem_object_unpin_map(h->hws);
i915_gem_object_put(h->hws);
+ kernel_context_close(h->ctx);
+
flush_test(h->i915, I915_WAIT_LOCKED);
}
@@ -342,7 +351,7 @@ static int igt_hang_sanitycheck(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- rq = hang_create_request(&h, engine, i915->kernel_context);
+ rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
pr_err("Failed to create request for %s, err=%d\n",
@@ -479,8 +488,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
struct drm_i915_gem_request *rq;
mutex_lock(&i915->drm.struct_mutex);
- rq = hang_create_request(&h, engine,
- i915->kernel_context);
+ rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
mutex_unlock(&i915->drm.struct_mutex);
@@ -687,8 +695,7 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915,
struct drm_i915_gem_request *rq;
mutex_lock(&i915->drm.struct_mutex);
- rq = hang_create_request(&h, engine,
- i915->kernel_context);
+ rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
mutex_unlock(&i915->drm.struct_mutex);
@@ -843,7 +850,7 @@ static int igt_wait_reset(void *arg)
if (err)
goto unlock;
- rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context);
+ rq = hang_create_request(&h, i915->engine[RCS]);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto fini;
@@ -922,7 +929,7 @@ static int igt_reset_queue(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
- prev = hang_create_request(&h, engine, i915->kernel_context);
+ prev = hang_create_request(&h, engine);
if (IS_ERR(prev)) {
err = PTR_ERR(prev);
goto fini;
@@ -936,9 +943,7 @@ static int igt_reset_queue(void *arg)
struct drm_i915_gem_request *rq;
unsigned int reset_count;
- rq = hang_create_request(&h,
- engine,
- i915->kernel_context);
+ rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto fini;
@@ -1049,7 +1054,7 @@ static int igt_handle_error(void *arg)
if (err)
goto err_unlock;
- rq = hang_create_request(&h, engine, i915->kernel_context);
+ rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_fini;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index bbf80d42e793..501becc47c0c 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -92,3 +92,14 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
return i915_gem_create_context(i915, file->driver_priv);
}
+
+struct i915_gem_context *
+kernel_context(struct drm_i915_private *i915)
+{
+ return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL);
+}
+
+void kernel_context_close(struct i915_gem_context *ctx)
+{
+ context_close(ctx);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h
index 2f432c03d413..29b9d60a158b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.h
+++ b/drivers/gpu/drm/i915/selftests/mock_context.h
@@ -36,4 +36,7 @@ void mock_context_close(struct i915_gem_context *ctx);
struct i915_gem_context *
live_context(struct drm_i915_private *i915, struct drm_file *file);
+struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
+void kernel_context_close(struct i915_gem_context *ctx);
+
#endif /* !__MOCK_CONTEXT_H */
--
2.15.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 3+ messages in thread
* ✗ Fi.CI.BAT: warning for series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines
2018-02-05 14:34 [CI 1/2] drm/i915/selftests: Flush old resets between engines Chris Wilson
2018-02-05 14:34 ` [CI 2/2] drm/i915/selftests: Use a sacrificial context for hang testing Chris Wilson
@ 2018-02-05 14:54 ` Patchwork
1 sibling, 0 replies; 3+ messages in thread
From: Patchwork @ 2018-02-05 14:54 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines
URL : https://patchwork.freedesktop.org/series/37661/
State : warning
== Summary ==
Series 37661v1 series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines
https://patchwork.freedesktop.org/api/1.0/series/37661/revisions/1/mbox/
Test gem_sync:
Subgroup basic-all:
pass -> SKIP (fi-blb-e6850)
Subgroup basic-each:
pass -> SKIP (fi-blb-e6850)
Subgroup basic-many-each:
pass -> SKIP (fi-blb-e6850)
Subgroup basic-store-all:
pass -> SKIP (fi-blb-e6850)
Subgroup basic-store-each:
pass -> SKIP (fi-blb-e6850)
Test gem_tiled_blits:
Subgroup basic:
pass -> SKIP (fi-blb-e6850)
Test gem_tiled_fence_blits:
Subgroup basic:
pass -> SKIP (fi-blb-e6850)
Test gem_wait:
Subgroup basic-busy-all:
pass -> SKIP (fi-blb-e6850)
Subgroup basic-wait-all:
pass -> SKIP (fi-blb-e6850)
Subgroup basic-await-all:
pass -> SKIP (fi-blb-e6850)
Test kms_busy:
Subgroup basic-flip-a:
pass -> SKIP (fi-blb-e6850)
Subgroup basic-flip-b:
pass -> SKIP (fi-blb-e6850)
Test kms_cursor_legacy:
Subgroup basic-busy-flip-before-cursor-legacy:
pass -> SKIP (fi-blb-e6850)
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-b:
incomplete -> PASS (fi-snb-2520m) fdo#103713
dmesg-warn -> PASS (fi-cnl-y3) fdo#104951
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#104951 https://bugs.freedesktop.org/show_bug.cgi?id=104951
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:416s
fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:422s
fi-blb-e6850 total:288 pass:210 dwarn:1 dfail:0 fail:0 skip:77 time:344s
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:483s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:281s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:486s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:464s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:449s
fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:564s
fi-cnl-y3 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:576s
fi-elk-e7500 total:288 pass:229 dwarn:0 dfail:0 fail:0 skip:59 time:412s
fi-gdg-551 total:288 pass:180 dwarn:0 dfail:0 fail:0 skip:108 time:280s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:508s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:391s
fi-hsw-4770r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:397s
fi-ilk-650 total:288 pass:228 dwarn:0 dfail:0 fail:0 skip:60 time:409s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:458s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:417s
fi-kbl-7500u total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:457s
fi-kbl-7560u total:288 pass:269 dwarn:0 dfail:0 fail:0 skip:19 time:492s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:452s
fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:501s
fi-pnv-d510 total:288 pass:222 dwarn:1 dfail:0 fail:0 skip:65 time:585s
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:427s
fi-skl-6600u total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:503s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:523s
fi-skl-6700k2 total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:485s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:481s
fi-skl-guc total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:413s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:431s
fi-snb-2520m total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:525s
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:392s
Blacklisted hosts:
fi-glk-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:466s
fi-bxt-dsi failed to collect. IGT log at Patchwork_7887/fi-bxt-dsi/igt.log
ff0257f786f370890f8fa940b2cd7dce0064be72 drm-tip: 2018y-02m-05d-13h-26m-33s UTC integration manifest
b3cd80d7ddf7 drm/i915/selftests: Use a sacrificial context for hang testing
9058fc4f0377 drm/i915/selftests: Flush old resets between engines
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7887/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-02-05 14:54 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-05 14:34 [CI 1/2] drm/i915/selftests: Flush old resets between engines Chris Wilson
2018-02-05 14:34 ` [CI 2/2] drm/i915/selftests: Use a sacrificial context for hang testing Chris Wilson
2018-02-05 14:54 ` ✗ Fi.CI.BAT: warning for series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines Patchwork
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.