All of lore.kernel.org
 help / color / mirror / Atom feed
* [CI 1/2] drm/i915/selftests: Flush old resets between engines
@ 2018-02-05 14:34 Chris Wilson
  2018-02-05 14:34 ` [CI 2/2] drm/i915/selftests: Use a sacrificial context for hang testing Chris Wilson
  2018-02-05 14:54 ` ✗ Fi.CI.BAT: warning for series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines Patchwork
  0 siblings, 2 replies; 3+ messages in thread
From: Chris Wilson @ 2018-02-05 14:34 UTC (permalink / raw)
  To: intel-gfx

When injecting rapid resets, we must be careful to at least wait for the
previous reset to have taken effect and the engine restarted. If we
perform a second reset before that has happened, we will notice that the
engine hasn't recovered and declare it lost, wedging the device and
failing. In practice, since we wait for each hanging batch to start
before injecting the reset, this too-fast-reset condition can only be
triggered when moving onto the next engine in the test, so we need only
wait for the existing reset to complete before switching engines.

v2: Wrap up the wait inside a safety net to bail out in case of angry hw.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 66 ++++++++++++++++++++++--
 1 file changed, 63 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index d1f91a533afa..a42c539c1efe 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -244,6 +244,58 @@ static u32 hws_seqno(const struct hang *h,
 	return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
 }
 
+struct wedge_me {
+	struct delayed_work work;
+	struct drm_i915_private *i915;
+	const void *symbol;
+};
+
+static void wedge_me(struct work_struct *work)
+{
+	struct wedge_me *w = container_of(work, typeof(*w), work.work);
+
+	pr_err("%pS timed out, cancelling all further testing.\n",
+	       w->symbol);
+	i915_gem_set_wedged(w->i915);
+}
+
+static void __init_wedge(struct wedge_me *w,
+			 struct drm_i915_private *i915,
+			 long timeout,
+			 const void *symbol)
+{
+	w->i915 = i915;
+	w->symbol = symbol;
+
+	INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
+	schedule_delayed_work(&w->work, timeout);
+}
+
+static void __fini_wedge(struct wedge_me *w)
+{
+	cancel_delayed_work_sync(&w->work);
+	destroy_delayed_work_on_stack(&w->work);
+	w->i915 = NULL;
+}
+
+#define wedge_on_timeout(W, DEV, TIMEOUT)				\
+	for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \
+	     (W)->i915;							\
+	     __fini_wedge((W)))
+
+static noinline int
+flush_test(struct drm_i915_private *i915, unsigned int flags)
+{
+	struct wedge_me w;
+
+	cond_resched();
+
+	wedge_on_timeout(&w, i915, HZ)
+		i915_gem_wait_for_idle(i915, flags);
+
+	return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
+}
+
 static void hang_fini(struct hang *h)
 {
 	*h->batch = MI_BATCH_BUFFER_END;
@@ -255,7 +307,7 @@ static void hang_fini(struct hang *h)
 	i915_gem_object_unpin_map(h->hws);
 	i915_gem_object_put(h->hws);
 
-	i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
+	flush_test(h->i915, I915_WAIT_LOCKED);
 }
 
 static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
@@ -487,7 +539,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 		if (err)
 			break;
 
-		cond_resched();
+		err = flush_test(i915, 0);
+		if (err)
+			break;
 	}
 
 	if (i915_terminally_wedged(&i915->gpu_error))
@@ -726,7 +780,9 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915,
 		if (err)
 			break;
 
-		cond_resched();
+		err = flush_test(i915, 0);
+		if (err)
+			break;
 	}
 
 	if (i915_terminally_wedged(&i915->gpu_error))
@@ -952,6 +1008,10 @@ static int igt_reset_queue(void *arg)
 		i915_gem_chipset_flush(i915);
 
 		i915_gem_request_put(prev);
+
+		err = flush_test(i915, I915_WAIT_LOCKED);
+		if (err)
+			break;
 	}
 
 fini:
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [CI 2/2] drm/i915/selftests: Use a sacrificial context for hang testing
  2018-02-05 14:34 [CI 1/2] drm/i915/selftests: Flush old resets between engines Chris Wilson
@ 2018-02-05 14:34 ` Chris Wilson
  2018-02-05 14:54 ` ✗ Fi.CI.BAT: warning for series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines Patchwork
  1 sibling, 0 replies; 3+ messages in thread
From: Chris Wilson @ 2018-02-05 14:34 UTC (permalink / raw)
  To: intel-gfx

Avoid injecting hangs in to the i915->kernel_context in case the GPU
reset leaves corruption in the context image in its wake (leading to
continual failures and system hangs after the selftests are ostensibly
complete). Use a sacrificial kernel_context instead.

v2: Closing a context is tricky; export a function (for selftests) from
i915_gem_context.c to get it right.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 39 +++++++++++++-----------
 drivers/gpu/drm/i915/selftests/mock_context.c    | 11 +++++++
 drivers/gpu/drm/i915/selftests/mock_context.h    |  3 ++
 3 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index a42c539c1efe..d1d2c2456f69 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -33,6 +33,7 @@ struct hang {
 	struct drm_i915_private *i915;
 	struct drm_i915_gem_object *hws;
 	struct drm_i915_gem_object *obj;
+	struct i915_gem_context *ctx;
 	u32 *seqno;
 	u32 *batch;
 };
@@ -45,9 +46,15 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915)
 	memset(h, 0, sizeof(*h));
 	h->i915 = i915;
 
+	h->ctx = kernel_context(i915);
+	if (IS_ERR(h->ctx))
+		return PTR_ERR(h->ctx);
+
 	h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(h->hws))
-		return PTR_ERR(h->hws);
+	if (IS_ERR(h->hws)) {
+		err = PTR_ERR(h->hws);
+		goto err_ctx;
+	}
 
 	h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 	if (IS_ERR(h->obj)) {
@@ -79,6 +86,8 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915)
 	i915_gem_object_put(h->obj);
 err_hws:
 	i915_gem_object_put(h->hws);
+err_ctx:
+	kernel_context_close(h->ctx);
 	return err;
 }
 
@@ -196,9 +205,7 @@ static int emit_recurse_batch(struct hang *h,
 }
 
 static struct drm_i915_gem_request *
-hang_create_request(struct hang *h,
-		    struct intel_engine_cs *engine,
-		    struct i915_gem_context *ctx)
+hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *rq;
 	int err;
@@ -225,7 +232,7 @@ hang_create_request(struct hang *h,
 		h->batch = vaddr;
 	}
 
-	rq = i915_gem_request_alloc(engine, ctx);
+	rq = i915_gem_request_alloc(engine, h->ctx);
 	if (IS_ERR(rq))
 		return rq;
 
@@ -307,6 +314,8 @@ static void hang_fini(struct hang *h)
 	i915_gem_object_unpin_map(h->hws);
 	i915_gem_object_put(h->hws);
 
+	kernel_context_close(h->ctx);
+
 	flush_test(h->i915, I915_WAIT_LOCKED);
 }
 
@@ -342,7 +351,7 @@ static int igt_hang_sanitycheck(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
-		rq = hang_create_request(&h, engine, i915->kernel_context);
+		rq = hang_create_request(&h, engine);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			pr_err("Failed to create request for %s, err=%d\n",
@@ -479,8 +488,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 				struct drm_i915_gem_request *rq;
 
 				mutex_lock(&i915->drm.struct_mutex);
-				rq = hang_create_request(&h, engine,
-							 i915->kernel_context);
+				rq = hang_create_request(&h, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					mutex_unlock(&i915->drm.struct_mutex);
@@ -687,8 +695,7 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915,
 				struct drm_i915_gem_request *rq;
 
 				mutex_lock(&i915->drm.struct_mutex);
-				rq = hang_create_request(&h, engine,
-							 i915->kernel_context);
+				rq = hang_create_request(&h, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					mutex_unlock(&i915->drm.struct_mutex);
@@ -843,7 +850,7 @@ static int igt_wait_reset(void *arg)
 	if (err)
 		goto unlock;
 
-	rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context);
+	rq = hang_create_request(&h, i915->engine[RCS]);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto fini;
@@ -922,7 +929,7 @@ static int igt_reset_queue(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
-		prev = hang_create_request(&h, engine, i915->kernel_context);
+		prev = hang_create_request(&h, engine);
 		if (IS_ERR(prev)) {
 			err = PTR_ERR(prev);
 			goto fini;
@@ -936,9 +943,7 @@ static int igt_reset_queue(void *arg)
 			struct drm_i915_gem_request *rq;
 			unsigned int reset_count;
 
-			rq = hang_create_request(&h,
-						 engine,
-						 i915->kernel_context);
+			rq = hang_create_request(&h, engine);
 			if (IS_ERR(rq)) {
 				err = PTR_ERR(rq);
 				goto fini;
@@ -1049,7 +1054,7 @@ static int igt_handle_error(void *arg)
 	if (err)
 		goto err_unlock;
 
-	rq = hang_create_request(&h, engine, i915->kernel_context);
+	rq = hang_create_request(&h, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_fini;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index bbf80d42e793..501becc47c0c 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -92,3 +92,14 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
 
 	return i915_gem_create_context(i915, file->driver_priv);
 }
+
+struct i915_gem_context *
+kernel_context(struct drm_i915_private *i915)
+{
+	return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL);
+}
+
+void kernel_context_close(struct i915_gem_context *ctx)
+{
+	context_close(ctx);
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h
index 2f432c03d413..29b9d60a158b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.h
+++ b/drivers/gpu/drm/i915/selftests/mock_context.h
@@ -36,4 +36,7 @@ void mock_context_close(struct i915_gem_context *ctx);
 struct i915_gem_context *
 live_context(struct drm_i915_private *i915, struct drm_file *file);
 
+struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
+void kernel_context_close(struct i915_gem_context *ctx);
+
 #endif /* !__MOCK_CONTEXT_H */
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* ✗ Fi.CI.BAT: warning for series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines
  2018-02-05 14:34 [CI 1/2] drm/i915/selftests: Flush old resets between engines Chris Wilson
  2018-02-05 14:34 ` [CI 2/2] drm/i915/selftests: Use a sacrificial context for hang testing Chris Wilson
@ 2018-02-05 14:54 ` Patchwork
  1 sibling, 0 replies; 3+ messages in thread
From: Patchwork @ 2018-02-05 14:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines
URL   : https://patchwork.freedesktop.org/series/37661/
State : warning

== Summary ==

Series 37661v1 series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines
https://patchwork.freedesktop.org/api/1.0/series/37661/revisions/1/mbox/

Test gem_sync:
        Subgroup basic-all:
                pass       -> SKIP       (fi-blb-e6850)
        Subgroup basic-each:
                pass       -> SKIP       (fi-blb-e6850)
        Subgroup basic-many-each:
                pass       -> SKIP       (fi-blb-e6850)
        Subgroup basic-store-all:
                pass       -> SKIP       (fi-blb-e6850)
        Subgroup basic-store-each:
                pass       -> SKIP       (fi-blb-e6850)
Test gem_tiled_blits:
        Subgroup basic:
                pass       -> SKIP       (fi-blb-e6850)
Test gem_tiled_fence_blits:
        Subgroup basic:
                pass       -> SKIP       (fi-blb-e6850)
Test gem_wait:
        Subgroup basic-busy-all:
                pass       -> SKIP       (fi-blb-e6850)
        Subgroup basic-wait-all:
                pass       -> SKIP       (fi-blb-e6850)
        Subgroup basic-await-all:
                pass       -> SKIP       (fi-blb-e6850)
Test kms_busy:
        Subgroup basic-flip-a:
                pass       -> SKIP       (fi-blb-e6850)
        Subgroup basic-flip-b:
                pass       -> SKIP       (fi-blb-e6850)
Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-legacy:
                pass       -> SKIP       (fi-blb-e6850)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                incomplete -> PASS       (fi-snb-2520m) fdo#103713
                dmesg-warn -> PASS       (fi-cnl-y3) fdo#104951

fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#104951 https://bugs.freedesktop.org/show_bug.cgi?id=104951

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:416s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:422s
fi-blb-e6850     total:288  pass:210  dwarn:1   dfail:0   fail:0   skip:77  time:344s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:483s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:281s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:486s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:464s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:449s
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:564s
fi-cnl-y3        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:576s
fi-elk-e7500     total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:412s
fi-gdg-551       total:288  pass:180  dwarn:0   dfail:0   fail:0   skip:108 time:280s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:508s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:391s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:397s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:409s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:458s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:417s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:457s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:492s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:452s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:501s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:585s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:427s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:503s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:523s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:485s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:481s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:413s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:431s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:525s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:392s
Blacklisted hosts:
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:466s
fi-bxt-dsi failed to collect. IGT log at Patchwork_7887/fi-bxt-dsi/igt.log

ff0257f786f370890f8fa940b2cd7dce0064be72 drm-tip: 2018y-02m-05d-13h-26m-33s UTC integration manifest
b3cd80d7ddf7 drm/i915/selftests: Use a sacrificial context for hang testing
9058fc4f0377 drm/i915/selftests: Flush old resets between engines

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7887/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-02-05 14:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-05 14:34 [CI 1/2] drm/i915/selftests: Flush old resets between engines Chris Wilson
2018-02-05 14:34 ` [CI 2/2] drm/i915/selftests: Use a sacrificial context for hang testing Chris Wilson
2018-02-05 14:54 ` ✗ Fi.CI.BAT: warning for series starting with [CI,1/2] drm/i915/selftests: Flush old resets between engines Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.