All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH i-g-t v2 1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset
@ 2018-03-29 13:05 ` Tvrtko Ursulin
  0 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-03-29 13:05 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Contexts executing when reset triggers are potentialy corrupt so trying to
use them from a subsequent test (like the default context) can hang the
GPU or even the driver.

Workaround that by always creating a dedicated context which will be
running when GPU reset happens.

v2:
 * Export and use gem_reopen_device so the test works on old gens as well.
   (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 lib/i915/gem_submission.c | 11 +++++--
 lib/i915/gem_submission.h |  2 ++
 tests/gem_eio.c           | 77 ++++++++++++++++++++++++++++++++---------------
 3 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/lib/i915/gem_submission.c b/lib/i915/gem_submission.c
index 7d3cbdbf8e77..2fd460d5ed2b 100644
--- a/lib/i915/gem_submission.c
+++ b/lib/i915/gem_submission.c
@@ -165,7 +165,14 @@ bool gem_has_guc_submission(int fd)
 	return gem_submission_method(fd) & GEM_SUBMISSION_GUC;
 }
 
-static int reopen_driver(int fd)
+/**
+ * gem_reopen_driver:
+ * @fd: re-open the i915 drm file descriptor
+ *
+ * Re-opens the drm fd which is useful in instances where a clean default
+ * context is needed.
+ */
+int gem_reopen_driver(int fd)
 {
 	char path[256];
 
@@ -201,7 +208,7 @@ void gem_test_engine(int i915, unsigned int engine)
 		.buffer_count = 1,
 	};
 
-	i915 = reopen_driver(i915);
+	i915 = gem_reopen_driver(i915);
 	igt_assert(!is_wedged(i915));
 
 	obj.handle = gem_create(i915, 4096);
diff --git a/lib/i915/gem_submission.h b/lib/i915/gem_submission.h
index 6b39a0532295..f94eabb201b4 100644
--- a/lib/i915/gem_submission.h
+++ b/lib/i915/gem_submission.h
@@ -35,4 +35,6 @@ bool gem_has_guc_submission(int fd);
 
 void gem_test_engine(int fd, unsigned int engine);
 
+int gem_reopen_driver(int fd);
+
 #endif /* GEM_SUBMISSION_H */
diff --git a/tests/gem_eio.c b/tests/gem_eio.c
index b824d9d4c9c0..b7c5047f0816 100644
--- a/tests/gem_eio.c
+++ b/tests/gem_eio.c
@@ -255,6 +255,7 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
 {
 	igt_spin_t *hang;
 
+	fd = gem_reopen_driver(fd);
 	igt_require_gem(fd);
 
 	/*
@@ -276,10 +277,14 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
 	igt_require(i915_reset_control(true));
 
 	trigger_reset(fd);
+	close(fd);
 }
 
 static void test_suspend(int fd, int state)
 {
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+
 	/* Do a suspend first so that we don't skip inside the test */
 	igt_system_suspend_autoresume(state, SUSPEND_TEST_DEVICES);
 
@@ -291,27 +296,32 @@ static void test_suspend(int fd, int state)
 
 	igt_require(i915_reset_control(true));
 	trigger_reset(fd);
+	close(fd);
 }
 
 static void test_inflight(int fd, unsigned int wait)
 {
-	const uint32_t bbe = MI_BATCH_BUFFER_END;
-	struct drm_i915_gem_exec_object2 obj[2];
+	int parent_fd = fd;
 	unsigned int engine;
 
 	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 
-	memset(obj, 0, sizeof(obj));
-	obj[0].flags = EXEC_OBJECT_WRITE;
-	obj[1].handle = gem_create(fd, 4096);
-	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
-
-	for_each_engine(fd, engine) {
+	for_each_engine(parent_fd, engine) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj[2];
 		struct drm_i915_gem_execbuffer2 execbuf;
 		igt_spin_t *hang;
 		int fence[64]; /* conservative estimate of ring size */
 
+		fd = gem_reopen_driver(parent_fd);
+		igt_require_gem(fd);
+
+		memset(obj, 0, sizeof(obj));
+		obj[0].flags = EXEC_OBJECT_WRITE;
+		obj[1].handle = gem_create(fd, 4096);
+		gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
 		gem_quiescent_gpu(fd);
 		igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
 		igt_require(i915_reset_control(false));
@@ -340,6 +350,9 @@ static void test_inflight(int fd, unsigned int wait)
 		igt_spin_batch_free(fd, hang);
 		igt_assert(i915_reset_control(true));
 		trigger_reset(fd);
+
+		gem_close(fd, obj[1].handle);
+		close(fd);
 	}
 }
 
@@ -351,6 +364,7 @@ static void test_inflight_suspend(int fd)
 	int fence[64]; /* conservative estimate of ring size */
 	igt_spin_t *hang;
 
+	fd = gem_reopen_driver(fd);
 	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 	igt_require(i915_reset_control(false));
@@ -387,6 +401,7 @@ static void test_inflight_suspend(int fd)
 	igt_spin_batch_free(fd, hang);
 	igt_assert(i915_reset_control(true));
 	trigger_reset(fd);
+	close(fd);
 }
 
 static uint32_t context_create_safe(int i915)
@@ -408,33 +423,37 @@ static uint32_t context_create_safe(int i915)
 
 static void test_inflight_contexts(int fd, unsigned int wait)
 {
-	struct drm_i915_gem_exec_object2 obj[2];
-	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	int parent_fd = fd;
 	unsigned int engine;
-	uint32_t ctx[64];
 
 	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 	gem_require_contexts(fd);
 
-	for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
-		ctx[n] = context_create_safe(fd);
-
-	memset(obj, 0, sizeof(obj));
-	obj[0].flags = EXEC_OBJECT_WRITE;
-	obj[1].handle = gem_create(fd, 4096);
-	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
-
-	for_each_engine(fd, engine) {
+	for_each_engine(parent_fd, engine) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj[2];
 		struct drm_i915_gem_execbuffer2 execbuf;
 		igt_spin_t *hang;
+		uint32_t ctx[64];
 		int fence[64];
 
+		fd = gem_reopen_driver(parent_fd);
+		igt_require_gem(fd);
+
+		for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
+			ctx[n] = context_create_safe(fd);
+
 		gem_quiescent_gpu(fd);
 
 		igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
 		igt_require(i915_reset_control(false));
 
+		memset(obj, 0, sizeof(obj));
+		obj[0].flags = EXEC_OBJECT_WRITE;
+		obj[1].handle = gem_create(fd, 4096);
+		gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
 		hang = spin_sync(fd, 0, engine);
 		obj[0].handle = hang->handle;
 
@@ -458,12 +477,15 @@ static void test_inflight_contexts(int fd, unsigned int wait)
 		}
 
 		igt_spin_batch_free(fd, hang);
+		gem_close(fd, obj[1].handle);
 		igt_assert(i915_reset_control(true));
 		trigger_reset(fd);
-	}
 
-	for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
-		gem_context_destroy(fd, ctx[n]);
+		for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
+			gem_context_destroy(fd, ctx[n]);
+
+		close(fd);
+	}
 }
 
 static void test_inflight_external(int fd)
@@ -478,6 +500,9 @@ static void test_inflight_external(int fd)
 	igt_require_sw_sync();
 	igt_require(gem_has_exec_fence(fd));
 
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+
 	fence = igt_cork_plug(&cork, fd);
 
 	igt_require(i915_reset_control(false));
@@ -514,6 +539,7 @@ static void test_inflight_external(int fd)
 	igt_spin_batch_free(fd, hang);
 	igt_assert(i915_reset_control(true));
 	trigger_reset(fd);
+	close(fd);
 }
 
 static void test_inflight_internal(int fd, unsigned int wait)
@@ -525,9 +551,11 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	int fences[16];
 	igt_spin_t *hang;
 
-	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+
 	igt_require(i915_reset_control(false));
 	hang = spin_sync(fd, 0, 0);
 
@@ -560,6 +588,7 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	igt_spin_batch_free(fd, hang);
 	igt_assert(i915_reset_control(true));
 	trigger_reset(fd);
+	close(fd);
 }
 
 static int fd = -1;
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [igt-dev] [PATCH i-g-t v2 1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset
@ 2018-03-29 13:05 ` Tvrtko Ursulin
  0 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-03-29 13:05 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Contexts executing when reset triggers are potentialy corrupt so trying to
use them from a subsequent test (like the default context) can hang the
GPU or even the driver.

Workaround that by always creating a dedicated context which will be
running when GPU reset happens.

v2:
 * Export and use gem_reopen_device so the test works on old gens as well.
   (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 lib/i915/gem_submission.c | 11 +++++--
 lib/i915/gem_submission.h |  2 ++
 tests/gem_eio.c           | 77 ++++++++++++++++++++++++++++++++---------------
 3 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/lib/i915/gem_submission.c b/lib/i915/gem_submission.c
index 7d3cbdbf8e77..2fd460d5ed2b 100644
--- a/lib/i915/gem_submission.c
+++ b/lib/i915/gem_submission.c
@@ -165,7 +165,14 @@ bool gem_has_guc_submission(int fd)
 	return gem_submission_method(fd) & GEM_SUBMISSION_GUC;
 }
 
-static int reopen_driver(int fd)
+/**
+ * gem_reopen_driver:
+ * @fd: re-open the i915 drm file descriptor
+ *
+ * Re-opens the drm fd which is useful in instances where a clean default
+ * context is needed.
+ */
+int gem_reopen_driver(int fd)
 {
 	char path[256];
 
@@ -201,7 +208,7 @@ void gem_test_engine(int i915, unsigned int engine)
 		.buffer_count = 1,
 	};
 
-	i915 = reopen_driver(i915);
+	i915 = gem_reopen_driver(i915);
 	igt_assert(!is_wedged(i915));
 
 	obj.handle = gem_create(i915, 4096);
diff --git a/lib/i915/gem_submission.h b/lib/i915/gem_submission.h
index 6b39a0532295..f94eabb201b4 100644
--- a/lib/i915/gem_submission.h
+++ b/lib/i915/gem_submission.h
@@ -35,4 +35,6 @@ bool gem_has_guc_submission(int fd);
 
 void gem_test_engine(int fd, unsigned int engine);
 
+int gem_reopen_driver(int fd);
+
 #endif /* GEM_SUBMISSION_H */
diff --git a/tests/gem_eio.c b/tests/gem_eio.c
index b824d9d4c9c0..b7c5047f0816 100644
--- a/tests/gem_eio.c
+++ b/tests/gem_eio.c
@@ -255,6 +255,7 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
 {
 	igt_spin_t *hang;
 
+	fd = gem_reopen_driver(fd);
 	igt_require_gem(fd);
 
 	/*
@@ -276,10 +277,14 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
 	igt_require(i915_reset_control(true));
 
 	trigger_reset(fd);
+	close(fd);
 }
 
 static void test_suspend(int fd, int state)
 {
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+
 	/* Do a suspend first so that we don't skip inside the test */
 	igt_system_suspend_autoresume(state, SUSPEND_TEST_DEVICES);
 
@@ -291,27 +296,32 @@ static void test_suspend(int fd, int state)
 
 	igt_require(i915_reset_control(true));
 	trigger_reset(fd);
+	close(fd);
 }
 
 static void test_inflight(int fd, unsigned int wait)
 {
-	const uint32_t bbe = MI_BATCH_BUFFER_END;
-	struct drm_i915_gem_exec_object2 obj[2];
+	int parent_fd = fd;
 	unsigned int engine;
 
 	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 
-	memset(obj, 0, sizeof(obj));
-	obj[0].flags = EXEC_OBJECT_WRITE;
-	obj[1].handle = gem_create(fd, 4096);
-	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
-
-	for_each_engine(fd, engine) {
+	for_each_engine(parent_fd, engine) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj[2];
 		struct drm_i915_gem_execbuffer2 execbuf;
 		igt_spin_t *hang;
 		int fence[64]; /* conservative estimate of ring size */
 
+		fd = gem_reopen_driver(parent_fd);
+		igt_require_gem(fd);
+
+		memset(obj, 0, sizeof(obj));
+		obj[0].flags = EXEC_OBJECT_WRITE;
+		obj[1].handle = gem_create(fd, 4096);
+		gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
 		gem_quiescent_gpu(fd);
 		igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
 		igt_require(i915_reset_control(false));
@@ -340,6 +350,9 @@ static void test_inflight(int fd, unsigned int wait)
 		igt_spin_batch_free(fd, hang);
 		igt_assert(i915_reset_control(true));
 		trigger_reset(fd);
+
+		gem_close(fd, obj[1].handle);
+		close(fd);
 	}
 }
 
@@ -351,6 +364,7 @@ static void test_inflight_suspend(int fd)
 	int fence[64]; /* conservative estimate of ring size */
 	igt_spin_t *hang;
 
+	fd = gem_reopen_driver(fd);
 	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 	igt_require(i915_reset_control(false));
@@ -387,6 +401,7 @@ static void test_inflight_suspend(int fd)
 	igt_spin_batch_free(fd, hang);
 	igt_assert(i915_reset_control(true));
 	trigger_reset(fd);
+	close(fd);
 }
 
 static uint32_t context_create_safe(int i915)
@@ -408,33 +423,37 @@ static uint32_t context_create_safe(int i915)
 
 static void test_inflight_contexts(int fd, unsigned int wait)
 {
-	struct drm_i915_gem_exec_object2 obj[2];
-	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	int parent_fd = fd;
 	unsigned int engine;
-	uint32_t ctx[64];
 
 	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 	gem_require_contexts(fd);
 
-	for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
-		ctx[n] = context_create_safe(fd);
-
-	memset(obj, 0, sizeof(obj));
-	obj[0].flags = EXEC_OBJECT_WRITE;
-	obj[1].handle = gem_create(fd, 4096);
-	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
-
-	for_each_engine(fd, engine) {
+	for_each_engine(parent_fd, engine) {
+		const uint32_t bbe = MI_BATCH_BUFFER_END;
+		struct drm_i915_gem_exec_object2 obj[2];
 		struct drm_i915_gem_execbuffer2 execbuf;
 		igt_spin_t *hang;
+		uint32_t ctx[64];
 		int fence[64];
 
+		fd = gem_reopen_driver(parent_fd);
+		igt_require_gem(fd);
+
+		for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
+			ctx[n] = context_create_safe(fd);
+
 		gem_quiescent_gpu(fd);
 
 		igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
 		igt_require(i915_reset_control(false));
 
+		memset(obj, 0, sizeof(obj));
+		obj[0].flags = EXEC_OBJECT_WRITE;
+		obj[1].handle = gem_create(fd, 4096);
+		gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
 		hang = spin_sync(fd, 0, engine);
 		obj[0].handle = hang->handle;
 
@@ -458,12 +477,15 @@ static void test_inflight_contexts(int fd, unsigned int wait)
 		}
 
 		igt_spin_batch_free(fd, hang);
+		gem_close(fd, obj[1].handle);
 		igt_assert(i915_reset_control(true));
 		trigger_reset(fd);
-	}
 
-	for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
-		gem_context_destroy(fd, ctx[n]);
+		for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
+			gem_context_destroy(fd, ctx[n]);
+
+		close(fd);
+	}
 }
 
 static void test_inflight_external(int fd)
@@ -478,6 +500,9 @@ static void test_inflight_external(int fd)
 	igt_require_sw_sync();
 	igt_require(gem_has_exec_fence(fd));
 
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+
 	fence = igt_cork_plug(&cork, fd);
 
 	igt_require(i915_reset_control(false));
@@ -514,6 +539,7 @@ static void test_inflight_external(int fd)
 	igt_spin_batch_free(fd, hang);
 	igt_assert(i915_reset_control(true));
 	trigger_reset(fd);
+	close(fd);
 }
 
 static void test_inflight_internal(int fd, unsigned int wait)
@@ -525,9 +551,11 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	int fences[16];
 	igt_spin_t *hang;
 
-	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 
+	fd = gem_reopen_driver(fd);
+	igt_require_gem(fd);
+
 	igt_require(i915_reset_control(false));
 	hang = spin_sync(fd, 0, 0);
 
@@ -560,6 +588,7 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	igt_spin_batch_free(fd, hang);
 	igt_assert(i915_reset_control(true));
 	trigger_reset(fd);
+	close(fd);
 }
 
 static int fd = -1;
-- 
2.14.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH i-g-t 2/2] tests/gem_eio: Add context destroyer test
  2018-03-29 13:05 ` [igt-dev] " Tvrtko Ursulin
@ 2018-03-29 13:05   ` Tvrtko Ursulin
  -1 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-03-29 13:05 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Context destroyer is supposed to trigger wedging or resets at incovenient
times and then re-use the context so either the context or driver tracking
might get confused and break.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/gem_eio.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/tests/gem_eio.c b/tests/gem_eio.c
index b7c5047f0816..f3a452b2265c 100644
--- a/tests/gem_eio.c
+++ b/tests/gem_eio.c
@@ -591,6 +591,62 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	close(fd);
 }
 
+static void test_context_destroyer(int fd, unsigned int flags)
+{
+	uint32_t ctx0 = gem_context_create(fd);
+
+	igt_until_timeout(10) {
+		struct drm_i915_gem_execbuffer2 execbuf = { };
+		struct drm_i915_gem_exec_object2 obj = { };
+		uint32_t bbe = MI_BATCH_BUFFER_END;
+		igt_spin_t *hang;
+		unsigned int i;
+		uint32_t ctx;
+
+		gem_quiescent_gpu(fd);
+
+		igt_require(i915_reset_control(flags & TEST_WEDGE ?
+					       false : true));
+
+		ctx = context_create_safe(fd);
+
+		hang = spin_sync(fd, ctx0, 0);
+
+		obj.handle = gem_create(fd, 4096);
+		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+		execbuf.buffers_ptr = to_user_pointer(&obj);
+		execbuf.buffer_count = 1;
+		execbuf.rsvd1 = ctx0;
+
+		for (i = 0; i < 10; i++)
+			gem_execbuf(fd, &execbuf);
+
+		igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
+
+		igt_assert(i915_reset_control(true));
+		trigger_reset(fd);
+
+		gem_quiescent_gpu(fd);
+
+		execbuf.rsvd1 = ctx;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		execbuf.rsvd1 = ctx0;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		gem_sync(fd, obj.handle);
+		igt_spin_batch_free(fd, hang);
+		gem_context_destroy(fd, ctx);
+		gem_close(fd, obj.handle);
+
+	}
+
+	gem_context_destroy(fd, ctx0);
+}
+
 static int fd = -1;
 
 static void
@@ -635,6 +691,12 @@ igt_main
 	igt_subtest("in-flight-suspend")
 		test_inflight_suspend(fd);
 
+	igt_subtest("context-destroyer")
+		test_context_destroyer(fd, 0);
+
+	igt_subtest("context-destroyer-wedge")
+		test_context_destroyer(fd, TEST_WEDGE);
+
 	igt_subtest_group {
 		const struct {
 			unsigned int wait;
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [igt-dev] [PATCH i-g-t 2/2] tests/gem_eio: Add context destroyer test
@ 2018-03-29 13:05   ` Tvrtko Ursulin
  0 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-03-29 13:05 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Context destroyer is supposed to trigger wedging or resets at incovenient
times and then re-use the context so either the context or driver tracking
might get confused and break.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/gem_eio.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/tests/gem_eio.c b/tests/gem_eio.c
index b7c5047f0816..f3a452b2265c 100644
--- a/tests/gem_eio.c
+++ b/tests/gem_eio.c
@@ -591,6 +591,62 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	close(fd);
 }
 
+static void test_context_destroyer(int fd, unsigned int flags)
+{
+	uint32_t ctx0 = gem_context_create(fd);
+
+	igt_until_timeout(10) {
+		struct drm_i915_gem_execbuffer2 execbuf = { };
+		struct drm_i915_gem_exec_object2 obj = { };
+		uint32_t bbe = MI_BATCH_BUFFER_END;
+		igt_spin_t *hang;
+		unsigned int i;
+		uint32_t ctx;
+
+		gem_quiescent_gpu(fd);
+
+		igt_require(i915_reset_control(flags & TEST_WEDGE ?
+					       false : true));
+
+		ctx = context_create_safe(fd);
+
+		hang = spin_sync(fd, ctx0, 0);
+
+		obj.handle = gem_create(fd, 4096);
+		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+		execbuf.buffers_ptr = to_user_pointer(&obj);
+		execbuf.buffer_count = 1;
+		execbuf.rsvd1 = ctx0;
+
+		for (i = 0; i < 10; i++)
+			gem_execbuf(fd, &execbuf);
+
+		igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
+
+		igt_assert(i915_reset_control(true));
+		trigger_reset(fd);
+
+		gem_quiescent_gpu(fd);
+
+		execbuf.rsvd1 = ctx;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		execbuf.rsvd1 = ctx0;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		gem_sync(fd, obj.handle);
+		igt_spin_batch_free(fd, hang);
+		gem_context_destroy(fd, ctx);
+		gem_close(fd, obj.handle);
+
+	}
+
+	gem_context_destroy(fd, ctx0);
+}
+
 static int fd = -1;
 
 static void
@@ -635,6 +691,12 @@ igt_main
 	igt_subtest("in-flight-suspend")
 		test_inflight_suspend(fd);
 
+	igt_subtest("context-destroyer")
+		test_context_destroyer(fd, 0);
+
+	igt_subtest("context-destroyer-wedge")
+		test_context_destroyer(fd, TEST_WEDGE);
+
 	igt_subtest_group {
 		const struct {
 			unsigned int wait;
-- 
2.14.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,v2,1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset
  2018-03-29 13:05 ` [igt-dev] " Tvrtko Ursulin
  (?)
  (?)
@ 2018-03-29 17:50 ` Patchwork
  -1 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2018-03-29 17:50 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,v2,1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset
URL   : https://patchwork.freedesktop.org/series/40879/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
2cbd4ddf11b3eaf01f33d8bc2ad46411ec6c299a lib/igt_kms: Improve connector probing in igt_display_init(), v2.

with latest DRM-Tip kernel build CI_DRM_4008
befd0b655b91 drm-tip: 2018y-03m-29d-16h-19m-32s UTC integration manifest

Testlist changes:
+igt@gem_eio@context-destroyer
+igt@gem_eio@context-destroyer-wedge

---- Known issues:

Test debugfs_test:
        Subgroup read_all_entries:
                pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713
Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_chamelium:
        Subgroup dp-crc-fast:
                pass       -> DMESG-FAIL (fi-kbl-7500u) fdo#103841
Test prime_vgem:
        Subgroup basic-fence-flip:
                fail       -> PASS       (fi-ilk-650) fdo#104008

fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841
fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:430s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:443s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:382s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:538s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:296s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:512s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:525s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:507s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:410s
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:559s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:511s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:585s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:421s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:324s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:550s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:406s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:421s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:474s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:437s
fi-kbl-7500u     total:285  pass:259  dwarn:1   dfail:1   fail:0   skip:24  time:470s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:465s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:510s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:663s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:440s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:533s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:509s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:496s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:430s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:448s
fi-snb-2520m     total:3    pass:2    dwarn:0   dfail:0   fail:0   skip:0  
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:405s
Blacklisted hosts:
fi-cnl-psr       total:285  pass:256  dwarn:3   dfail:0   fail:0   skip:26  time:517s
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:485s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1209/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,v2,1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset
  2018-03-29 13:05 ` [igt-dev] " Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  (?)
@ 2018-03-29 22:14 ` Patchwork
  -1 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2018-03-29 22:14 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,v2,1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset
URL   : https://patchwork.freedesktop.org/series/40879/
State : success

== Summary ==

---- Possible new issues:

Test kms_frontbuffer_tracking:
        Subgroup psr-2p-scndscrn-cur-indfb-draw-mmap-gtt:
                fail       -> SKIP       (shard-snb)

---- Known issues:

Test kms_cursor_legacy:
        Subgroup flip-vs-cursor-toggle:
                fail       -> PASS       (shard-hsw) fdo#102670
Test kms_flip:
        Subgroup 2x-dpms-vs-vblank-race-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#103060 +1
        Subgroup 2x-flip-vs-expired-vblank:
                fail       -> PASS       (shard-hsw) fdo#102887
        Subgroup plain-flip-fb-recreate-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#100368
Test kms_rotation_crc:
        Subgroup sprite-rotation-180:
                fail       -> PASS       (shard-snb) fdo#103925
Test perf:
        Subgroup blocking:
                fail       -> PASS       (shard-hsw) fdo#102252

fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252

shard-apl        total:3497 pass:1833 dwarn:1   dfail:0   fail:7   skip:1655 time:12966s
shard-hsw        total:3497 pass:1782 dwarn:1   dfail:0   fail:4   skip:1709 time:11824s
shard-snb        total:3497 pass:1377 dwarn:1   dfail:0   fail:2   skip:2117 time:7051s
Blacklisted hosts:
shard-kbl        total:3424 pass:1920 dwarn:1   dfail:1   fail:6   skip:1495 time:9102s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1209/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
  2018-03-29 13:05   ` [igt-dev] " Tvrtko Ursulin
@ 2018-04-03 11:36     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-04-03 11:36 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Reset and unwedge stress testing is supposed to trigger wedging or resets
at incovenient times and then re-use the context so either the context or
driver tracking might get confused and break.

v2:
 * Renamed for more sensible naming.
 * Added some comments to explain what the test is doing. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/gem_eio.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/tests/gem_eio.c b/tests/gem_eio.c
index b7c5047f0816..9599e73db736 100644
--- a/tests/gem_eio.c
+++ b/tests/gem_eio.c
@@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	close(fd);
 }
 
+/*
+ * Verify that we can submit and execute work after unwedging the GPU.
+ */
+static void test_reset_stress(int fd, unsigned int flags)
+{
+	uint32_t ctx0 = gem_context_create(fd);
+
+	igt_until_timeout(5) {
+		struct drm_i915_gem_execbuffer2 execbuf = { };
+		struct drm_i915_gem_exec_object2 obj = { };
+		uint32_t bbe = MI_BATCH_BUFFER_END;
+		igt_spin_t *hang;
+		unsigned int i;
+		uint32_t ctx;
+
+		gem_quiescent_gpu(fd);
+
+		igt_require(i915_reset_control(flags & TEST_WEDGE ?
+					       false : true));
+
+		ctx = context_create_safe(fd);
+
+		/*
+		 * Start executing a spin batch with some queued batches
+		 * against a different context after it.
+		 */
+		hang = spin_sync(fd, ctx0, 0);
+
+		obj.handle = gem_create(fd, 4096);
+		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+		execbuf.buffers_ptr = to_user_pointer(&obj);
+		execbuf.buffer_count = 1;
+		execbuf.rsvd1 = ctx0;
+
+		for (i = 0; i < 10; i++)
+			gem_execbuf(fd, &execbuf);
+
+		/* Wedge after a small delay. */
+		igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
+
+		/* Unwedge by forcing a reset. */
+		igt_assert(i915_reset_control(true));
+		trigger_reset(fd);
+
+		gem_quiescent_gpu(fd);
+
+		/*
+		 * Verify that we are able to submit work after unwedging from
+		 * both contexts.
+		 */
+		execbuf.rsvd1 = ctx;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		execbuf.rsvd1 = ctx0;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		gem_sync(fd, obj.handle);
+		igt_spin_batch_free(fd, hang);
+		gem_context_destroy(fd, ctx);
+		gem_close(fd, obj.handle);
+	}
+
+	gem_context_destroy(fd, ctx0);
+}
+
 static int fd = -1;
 
 static void
@@ -635,6 +703,12 @@ igt_main
 	igt_subtest("in-flight-suspend")
 		test_inflight_suspend(fd);
 
+	igt_subtest("reset-stress")
+		test_reset_stress(fd, 0);
+
+	igt_subtest("unwedge-stress")
+		test_reset_stress(fd, TEST_WEDGE);
+
 	igt_subtest_group {
 		const struct {
 			unsigned int wait;
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
@ 2018-04-03 11:36     ` Tvrtko Ursulin
  0 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-04-03 11:36 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Reset and unwedge stress testing is supposed to trigger wedging or resets
at incovenient times and then re-use the context so either the context or
driver tracking might get confused and break.

v2:
 * Renamed for more sensible naming.
 * Added some comments to explain what the test is doing. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/gem_eio.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/tests/gem_eio.c b/tests/gem_eio.c
index b7c5047f0816..9599e73db736 100644
--- a/tests/gem_eio.c
+++ b/tests/gem_eio.c
@@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	close(fd);
 }
 
+/*
+ * Verify that we can submit and execute work after unwedging the GPU.
+ */
+static void test_reset_stress(int fd, unsigned int flags)
+{
+	uint32_t ctx0 = gem_context_create(fd);
+
+	igt_until_timeout(5) {
+		struct drm_i915_gem_execbuffer2 execbuf = { };
+		struct drm_i915_gem_exec_object2 obj = { };
+		uint32_t bbe = MI_BATCH_BUFFER_END;
+		igt_spin_t *hang;
+		unsigned int i;
+		uint32_t ctx;
+
+		gem_quiescent_gpu(fd);
+
+		igt_require(i915_reset_control(flags & TEST_WEDGE ?
+					       false : true));
+
+		ctx = context_create_safe(fd);
+
+		/*
+		 * Start executing a spin batch with some queued batches
+		 * against a different context after it.
+		 */
+		hang = spin_sync(fd, ctx0, 0);
+
+		obj.handle = gem_create(fd, 4096);
+		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+		execbuf.buffers_ptr = to_user_pointer(&obj);
+		execbuf.buffer_count = 1;
+		execbuf.rsvd1 = ctx0;
+
+		for (i = 0; i < 10; i++)
+			gem_execbuf(fd, &execbuf);
+
+		/* Wedge after a small delay. */
+		igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
+
+		/* Unwedge by forcing a reset. */
+		igt_assert(i915_reset_control(true));
+		trigger_reset(fd);
+
+		gem_quiescent_gpu(fd);
+
+		/*
+		 * Verify that we are able to submit work after unwedging from
+		 * both contexts.
+		 */
+		execbuf.rsvd1 = ctx;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		execbuf.rsvd1 = ctx0;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		gem_sync(fd, obj.handle);
+		igt_spin_batch_free(fd, hang);
+		gem_context_destroy(fd, ctx);
+		gem_close(fd, obj.handle);
+	}
+
+	gem_context_destroy(fd, ctx0);
+}
+
 static int fd = -1;
 
 static void
@@ -635,6 +703,12 @@ igt_main
 	igt_subtest("in-flight-suspend")
 		test_inflight_suspend(fd);
 
+	igt_subtest("reset-stress")
+		test_reset_stress(fd, 0);
+
+	igt_subtest("unwedge-stress")
+		test_reset_stress(fd, TEST_WEDGE);
+
 	igt_subtest_group {
 		const struct {
 			unsigned int wait;
-- 
2.14.1

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
  2018-04-03 11:36     ` [igt-dev] " Tvrtko Ursulin
@ 2018-04-03 11:51       ` Chris Wilson
  -1 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2018-04-03 11:51 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-03 12:36:44)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Reset and unwedge stress testing is supposed to trigger wedging or resets
> at incovenient times and then re-use the context so either the context or
> driver tracking might get confused and break.
> 
> v2:
>  * Renamed for more sensible naming.
>  * Added some comments to explain what the test is doing. (Chris Wilson)

You bring shame unto my tests with such beauty.

> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
@ 2018-04-03 11:51       ` Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2018-04-03 11:51 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-03 12:36:44)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Reset and unwedge stress testing is supposed to trigger wedging or resets
> at incovenient times and then re-use the context so either the context or
> driver tracking might get confused and break.
> 
> v2:
>  * Renamed for more sensible naming.
>  * Added some comments to explain what the test is doing. (Chris Wilson)

You bring shame unto my tests with such beauty.

> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,v2,1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset (rev2)
  2018-03-29 13:05 ` [igt-dev] " Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  (?)
@ 2018-04-03 13:39 ` Patchwork
  -1 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2018-04-03 13:39 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,v2,1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset (rev2)
URL   : https://patchwork.freedesktop.org/series/40879/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
da00bf83aba3b516922efa1f338381189461aa4a tests/kms_plane_scaling: fb height to be min 16 for NV12

with latest DRM-Tip kernel build CI_DRM_4016
9a423e97b1f6 drm-tip: 2018y-04m-03d-11h-50m-53s UTC integration manifest

Testlist changes:
+igt@gem_eio@reset-stress
+igt@gem_eio@unwedge-stress

fi-bdw-5557u     total:285  pass:264  dwarn:0   dfail:0   fail:0   skip:21  time:433s
fi-bdw-gvtdvm    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:443s
fi-blb-e6850     total:285  pass:220  dwarn:1   dfail:0   fail:0   skip:64  time:381s
fi-bsw-n3050     total:285  pass:239  dwarn:0   dfail:0   fail:0   skip:46  time:542s
fi-bwr-2160      total:285  pass:180  dwarn:0   dfail:0   fail:0   skip:105 time:301s
fi-bxt-dsi       total:285  pass:255  dwarn:0   dfail:0   fail:0   skip:30  time:519s
fi-bxt-j4205     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:512s
fi-byt-j1900     total:285  pass:250  dwarn:0   dfail:0   fail:0   skip:35  time:525s
fi-byt-n2820     total:285  pass:246  dwarn:0   dfail:0   fail:0   skip:39  time:512s
fi-cfl-8700k     total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:409s
fi-cfl-s3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:563s
fi-cfl-u         total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:513s
fi-cnl-y3        total:285  pass:259  dwarn:0   dfail:0   fail:0   skip:26  time:587s
fi-elk-e7500     total:285  pass:225  dwarn:1   dfail:0   fail:0   skip:59  time:424s
fi-gdg-551       total:285  pass:176  dwarn:0   dfail:0   fail:1   skip:108 time:316s
fi-glk-1         total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:540s
fi-hsw-4770      total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:408s
fi-ilk-650       total:285  pass:225  dwarn:0   dfail:0   fail:0   skip:60  time:420s
fi-ivb-3520m     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:470s
fi-ivb-3770      total:285  pass:252  dwarn:0   dfail:0   fail:0   skip:33  time:430s
fi-kbl-7500u     total:285  pass:260  dwarn:1   dfail:0   fail:0   skip:24  time:479s
fi-kbl-7567u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:461s
fi-kbl-r         total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:511s
fi-pnv-d510      total:285  pass:219  dwarn:1   dfail:0   fail:0   skip:65  time:662s
fi-skl-6260u     total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:440s
fi-skl-6600u     total:285  pass:258  dwarn:0   dfail:0   fail:0   skip:27  time:534s
fi-skl-6700k2    total:285  pass:261  dwarn:0   dfail:0   fail:0   skip:24  time:504s
fi-skl-6770hq    total:285  pass:265  dwarn:0   dfail:0   fail:0   skip:20  time:497s
fi-skl-guc       total:285  pass:257  dwarn:0   dfail:0   fail:0   skip:28  time:432s
fi-skl-gvtdvm    total:285  pass:262  dwarn:0   dfail:0   fail:0   skip:23  time:443s
fi-snb-2520m     total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:592s
fi-snb-2600      total:285  pass:245  dwarn:0   dfail:0   fail:0   skip:40  time:399s
Blacklisted hosts:
fi-cnl-psr       total:285  pass:256  dwarn:3   dfail:0   fail:0   skip:26  time:513s
fi-glk-j4005     total:285  pass:256  dwarn:0   dfail:0   fail:0   skip:29  time:486s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1217/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for series starting with [i-g-t,v2,1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset (rev2)
  2018-03-29 13:05 ` [igt-dev] " Tvrtko Ursulin
                   ` (4 preceding siblings ...)
  (?)
@ 2018-04-03 15:26 ` Patchwork
  -1 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2018-04-03 15:26 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: igt-dev

== Series Details ==

Series: series starting with [i-g-t,v2,1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset (rev2)
URL   : https://patchwork.freedesktop.org/series/40879/
State : success

== Summary ==

---- Possible new issues:

Test kms_draw_crc:
        Subgroup draw-method-xrgb8888-mmap-wc-untiled:
                skip       -> PASS       (shard-snb)
Test kms_flip:
        Subgroup flip-vs-modeset-vs-hang:
                dmesg-warn -> PASS       (shard-hsw)
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-offscren-pri-indfb-draw-mmap-cpu:
                skip       -> PASS       (shard-snb)
        Subgroup fbc-1p-primscrn-spr-indfb-move:
                skip       -> PASS       (shard-snb)
Test prime_vgem:
        Subgroup basic-fence-flip:
                skip       -> PASS       (shard-snb)
Test syncobj_wait:
        Subgroup multi-wait-for-submit-unsubmitted-signaled:
                incomplete -> PASS       (shard-snb)

---- Known issues:

Test kms_cursor_legacy:
        Subgroup flip-vs-cursor-toggle:
                fail       -> PASS       (shard-hsw) fdo#102670 +1
Test kms_flip:
        Subgroup 2x-dpms-vs-vblank-race-interruptible:
                fail       -> PASS       (shard-hsw) fdo#103060
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> FAIL       (shard-hsw) fdo#100368
        Subgroup flip-vs-expired-vblank-interruptible:
                fail       -> PASS       (shard-hsw) fdo#102887
Test kms_plane_multiple:
        Subgroup atomic-pipe-a-tiling-x:
                pass       -> FAIL       (shard-snb) fdo#103166
Test kms_sysfs_edid_timing:
                pass       -> WARN       (shard-apl) fdo#100047
Test kms_vblank:
        Subgroup pipe-c-accuracy-idle:
                fail       -> PASS       (shard-hsw) fdo#102583

fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
fdo#100047 https://bugs.freedesktop.org/show_bug.cgi?id=100047
fdo#102583 https://bugs.freedesktop.org/show_bug.cgi?id=102583

shard-apl        total:3498 pass:1834 dwarn:1   dfail:0   fail:7   skip:1655 time:12869s
shard-hsw        total:3498 pass:1782 dwarn:1   dfail:0   fail:4   skip:1710 time:11332s
shard-snb        total:3498 pass:1376 dwarn:1   dfail:0   fail:3   skip:2118 time:6964s
Blacklisted hosts:
shard-kbl        total:3498 pass:1961 dwarn:1   dfail:0   fail:7   skip:1529 time:9359s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1217/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
  2018-04-03 11:36     ` [igt-dev] " Tvrtko Ursulin
@ 2018-04-03 18:24       ` Antonio Argenziano
  -1 siblings, 0 replies; 22+ messages in thread
From: Antonio Argenziano @ 2018-04-03 18:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx



On 03/04/18 04:36, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Reset and unwedge stress testing is supposed to trigger wedging or resets
> at incovenient times and then re-use the context so either the context or
> driver tracking might get confused and break.
> 
> v2:
>   * Renamed for more sensible naming.
>   * Added some comments to explain what the test is doing. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/gem_eio.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 74 insertions(+)
> 
> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
> index b7c5047f0816..9599e73db736 100644
> --- a/tests/gem_eio.c
> +++ b/tests/gem_eio.c
> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, unsigned int wait)
>   	close(fd);
>   }
>   
> +/*
> + * Verify that we can submit and execute work after unwedging the GPU.
> + */
> +static void test_reset_stress(int fd, unsigned int flags)
> +{
> +	uint32_t ctx0 = gem_context_create(fd);
> +
> +	igt_until_timeout(5) {
> +		struct drm_i915_gem_execbuffer2 execbuf = { };
> +		struct drm_i915_gem_exec_object2 obj = { };
> +		uint32_t bbe = MI_BATCH_BUFFER_END;
> +		igt_spin_t *hang;
> +		unsigned int i;
> +		uint32_t ctx;
> +
> +		gem_quiescent_gpu(fd);
> +
> +		igt_require(i915_reset_control(flags & TEST_WEDGE ?
> +					       false : true));
> +
> +		ctx = context_create_safe(fd);
> +
> +		/*
> +		 * Start executing a spin batch with some queued batches
> +		 * against a different context after it.
> +		 */

Aren't all batches queued on ctx0? Or is this a reference to the check 
on ctx you have later in the test.

Thanks,
Antonio

> +		hang = spin_sync(fd, ctx0, 0);
> +
> +		obj.handle = gem_create(fd, 4096);
> +		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
> +
> +		execbuf.buffers_ptr = to_user_pointer(&obj);
> +		execbuf.buffer_count = 1;
> +		execbuf.rsvd1 = ctx0;
> +
> +		for (i = 0; i < 10; i++)
> +			gem_execbuf(fd, &execbuf);
> +
> +		/* Wedge after a small delay. */
> +		igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
> +
> +		/* Unwedge by forcing a reset. */
> +		igt_assert(i915_reset_control(true));
> +		trigger_reset(fd);
> +
> +		gem_quiescent_gpu(fd);
> +
> +		/*
> +		 * Verify that we are able to submit work after unwedging from
> +		 * both contexts.
> +		 */
> +		execbuf.rsvd1 = ctx;
> +		for (i = 0; i < 5; i++)
> +			gem_execbuf(fd, &execbuf);
> +
> +		execbuf.rsvd1 = ctx0;
> +		for (i = 0; i < 5; i++)
> +			gem_execbuf(fd, &execbuf);
> +
> +		gem_sync(fd, obj.handle);
> +		igt_spin_batch_free(fd, hang);
> +		gem_context_destroy(fd, ctx);
> +		gem_close(fd, obj.handle);
> +	}
> +
> +	gem_context_destroy(fd, ctx0);
> +}
> +
>   static int fd = -1;
>   
>   static void
> @@ -635,6 +703,12 @@ igt_main
>   	igt_subtest("in-flight-suspend")
>   		test_inflight_suspend(fd);
>   
> +	igt_subtest("reset-stress")
> +		test_reset_stress(fd, 0);
> +
> +	igt_subtest("unwedge-stress")
> +		test_reset_stress(fd, TEST_WEDGE);
> +
>   	igt_subtest_group {
>   		const struct {
>   			unsigned int wait;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
@ 2018-04-03 18:24       ` Antonio Argenziano
  0 siblings, 0 replies; 22+ messages in thread
From: Antonio Argenziano @ 2018-04-03 18:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin



On 03/04/18 04:36, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Reset and unwedge stress testing is supposed to trigger wedging or resets
> at incovenient times and then re-use the context so either the context or
> driver tracking might get confused and break.
> 
> v2:
>   * Renamed for more sensible naming.
>   * Added some comments to explain what the test is doing. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/gem_eio.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 74 insertions(+)
> 
> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
> index b7c5047f0816..9599e73db736 100644
> --- a/tests/gem_eio.c
> +++ b/tests/gem_eio.c
> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, unsigned int wait)
>   	close(fd);
>   }
>   
> +/*
> + * Verify that we can submit and execute work after unwedging the GPU.
> + */
> +static void test_reset_stress(int fd, unsigned int flags)
> +{
> +	uint32_t ctx0 = gem_context_create(fd);
> +
> +	igt_until_timeout(5) {
> +		struct drm_i915_gem_execbuffer2 execbuf = { };
> +		struct drm_i915_gem_exec_object2 obj = { };
> +		uint32_t bbe = MI_BATCH_BUFFER_END;
> +		igt_spin_t *hang;
> +		unsigned int i;
> +		uint32_t ctx;
> +
> +		gem_quiescent_gpu(fd);
> +
> +		igt_require(i915_reset_control(flags & TEST_WEDGE ?
> +					       false : true));
> +
> +		ctx = context_create_safe(fd);
> +
> +		/*
> +		 * Start executing a spin batch with some queued batches
> +		 * against a different context after it.
> +		 */

Aren't all batches queued on ctx0? Or is this a reference to the check 
on ctx you have later in the test.

Thanks,
Antonio

> +		hang = spin_sync(fd, ctx0, 0);
> +
> +		obj.handle = gem_create(fd, 4096);
> +		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
> +
> +		execbuf.buffers_ptr = to_user_pointer(&obj);
> +		execbuf.buffer_count = 1;
> +		execbuf.rsvd1 = ctx0;
> +
> +		for (i = 0; i < 10; i++)
> +			gem_execbuf(fd, &execbuf);
> +
> +		/* Wedge after a small delay. */
> +		igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
> +
> +		/* Unwedge by forcing a reset. */
> +		igt_assert(i915_reset_control(true));
> +		trigger_reset(fd);
> +
> +		gem_quiescent_gpu(fd);
> +
> +		/*
> +		 * Verify that we are able to submit work after unwedging from
> +		 * both contexts.
> +		 */
> +		execbuf.rsvd1 = ctx;
> +		for (i = 0; i < 5; i++)
> +			gem_execbuf(fd, &execbuf);
> +
> +		execbuf.rsvd1 = ctx0;
> +		for (i = 0; i < 5; i++)
> +			gem_execbuf(fd, &execbuf);
> +
> +		gem_sync(fd, obj.handle);
> +		igt_spin_batch_free(fd, hang);
> +		gem_context_destroy(fd, ctx);
> +		gem_close(fd, obj.handle);
> +	}
> +
> +	gem_context_destroy(fd, ctx0);
> +}
> +
>   static int fd = -1;
>   
>   static void
> @@ -635,6 +703,12 @@ igt_main
>   	igt_subtest("in-flight-suspend")
>   		test_inflight_suspend(fd);
>   
> +	igt_subtest("reset-stress")
> +		test_reset_stress(fd, 0);
> +
> +	igt_subtest("unwedge-stress")
> +		test_reset_stress(fd, TEST_WEDGE);
> +
>   	igt_subtest_group {
>   		const struct {
>   			unsigned int wait;
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
  2018-04-03 18:24       ` Antonio Argenziano
@ 2018-04-03 18:34         ` Antonio Argenziano
  -1 siblings, 0 replies; 22+ messages in thread
From: Antonio Argenziano @ 2018-04-03 18:34 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx



On 03/04/18 11:24, Antonio Argenziano wrote:
> 
> 
> On 03/04/18 04:36, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Reset and unwedge stress testing is supposed to trigger wedging or resets
>> at incovenient times and then re-use the context so either the context or
>> driver tracking might get confused and break.
>>
>> v2:
>>   * Renamed for more sensible naming.
>>   * Added some comments to explain what the test is doing. (Chris Wilson)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   tests/gem_eio.c | 74 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 74 insertions(+)
>>
>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
>> index b7c5047f0816..9599e73db736 100644
>> --- a/tests/gem_eio.c
>> +++ b/tests/gem_eio.c
>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
>> unsigned int wait)
>>       close(fd);
>>   }
>> +/*
>> + * Verify that we can submit and execute work after unwedging the GPU.
>> + */
>> +static void test_reset_stress(int fd, unsigned int flags)
>> +{
>> +    uint32_t ctx0 = gem_context_create(fd);
>> +
>> +    igt_until_timeout(5) {
>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
>> +        struct drm_i915_gem_exec_object2 obj = { };
>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
>> +        igt_spin_t *hang;
>> +        unsigned int i;
>> +        uint32_t ctx;
>> +
>> +        gem_quiescent_gpu(fd);
>> +
>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
>> +                           false : true));
>> +
>> +        ctx = context_create_safe(fd);
>> +
>> +        /*
>> +         * Start executing a spin batch with some queued batches
>> +         * against a different context after it.
>> +         */
> 
> Aren't all batches queued on ctx0? Or is this a reference to the check 
> on ctx you have later in the test.
> 
> Thanks,
> Antonio
> 
>> +        hang = spin_sync(fd, ctx0, 0);

I think you meant to send this^ on ctx.

Antonio.

>> +
>> +        obj.handle = gem_create(fd, 4096);
>> +        gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>> +
>> +        execbuf.buffers_ptr = to_user_pointer(&obj);
>> +        execbuf.buffer_count = 1;
>> +        execbuf.rsvd1 = ctx0;
>> +
>> +        for (i = 0; i < 10; i++)
>> +            gem_execbuf(fd, &execbuf);
>> +
>> +        /* Wedge after a small delay. */
>> +        igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
>> +
>> +        /* Unwedge by forcing a reset. */
>> +        igt_assert(i915_reset_control(true));
>> +        trigger_reset(fd);
>> +
>> +        gem_quiescent_gpu(fd);
>> +
>> +        /*
>> +         * Verify that we are able to submit work after unwedging from
>> +         * both contexts.
>> +         */
>> +        execbuf.rsvd1 = ctx;
>> +        for (i = 0; i < 5; i++)
>> +            gem_execbuf(fd, &execbuf);
>> +
>> +        execbuf.rsvd1 = ctx0;
>> +        for (i = 0; i < 5; i++)
>> +            gem_execbuf(fd, &execbuf);
>> +
>> +        gem_sync(fd, obj.handle);
>> +        igt_spin_batch_free(fd, hang);
>> +        gem_context_destroy(fd, ctx);
>> +        gem_close(fd, obj.handle);
>> +    }
>> +
>> +    gem_context_destroy(fd, ctx0);
>> +}
>> +
>>   static int fd = -1;
>>   static void
>> @@ -635,6 +703,12 @@ igt_main
>>       igt_subtest("in-flight-suspend")
>>           test_inflight_suspend(fd);
>> +    igt_subtest("reset-stress")
>> +        test_reset_stress(fd, 0);
>> +
>> +    igt_subtest("unwedge-stress")
>> +        test_reset_stress(fd, TEST_WEDGE);
>> +
>>       igt_subtest_group {
>>           const struct {
>>               unsigned int wait;
>>
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
@ 2018-04-03 18:34         ` Antonio Argenziano
  0 siblings, 0 replies; 22+ messages in thread
From: Antonio Argenziano @ 2018-04-03 18:34 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx, Tvrtko Ursulin



On 03/04/18 11:24, Antonio Argenziano wrote:
> 
> 
> On 03/04/18 04:36, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Reset and unwedge stress testing is supposed to trigger wedging or resets
>> at incovenient times and then re-use the context so either the context or
>> driver tracking might get confused and break.
>>
>> v2:
>>   * Renamed for more sensible naming.
>>   * Added some comments to explain what the test is doing. (Chris Wilson)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   tests/gem_eio.c | 74 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 74 insertions(+)
>>
>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
>> index b7c5047f0816..9599e73db736 100644
>> --- a/tests/gem_eio.c
>> +++ b/tests/gem_eio.c
>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
>> unsigned int wait)
>>       close(fd);
>>   }
>> +/*
>> + * Verify that we can submit and execute work after unwedging the GPU.
>> + */
>> +static void test_reset_stress(int fd, unsigned int flags)
>> +{
>> +    uint32_t ctx0 = gem_context_create(fd);
>> +
>> +    igt_until_timeout(5) {
>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
>> +        struct drm_i915_gem_exec_object2 obj = { };
>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
>> +        igt_spin_t *hang;
>> +        unsigned int i;
>> +        uint32_t ctx;
>> +
>> +        gem_quiescent_gpu(fd);
>> +
>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
>> +                           false : true));
>> +
>> +        ctx = context_create_safe(fd);
>> +
>> +        /*
>> +         * Start executing a spin batch with some queued batches
>> +         * against a different context after it.
>> +         */
> 
> Aren't all batches queued on ctx0? Or is this a reference to the check 
> on ctx you have later in the test.
> 
> Thanks,
> Antonio
> 
>> +        hang = spin_sync(fd, ctx0, 0);

I think you meant to send this^ on ctx.

Antonio.

>> +
>> +        obj.handle = gem_create(fd, 4096);
>> +        gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>> +
>> +        execbuf.buffers_ptr = to_user_pointer(&obj);
>> +        execbuf.buffer_count = 1;
>> +        execbuf.rsvd1 = ctx0;
>> +
>> +        for (i = 0; i < 10; i++)
>> +            gem_execbuf(fd, &execbuf);
>> +
>> +        /* Wedge after a small delay. */
>> +        igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
>> +
>> +        /* Unwedge by forcing a reset. */
>> +        igt_assert(i915_reset_control(true));
>> +        trigger_reset(fd);
>> +
>> +        gem_quiescent_gpu(fd);
>> +
>> +        /*
>> +         * Verify that we are able to submit work after unwedging from
>> +         * both contexts.
>> +         */
>> +        execbuf.rsvd1 = ctx;
>> +        for (i = 0; i < 5; i++)
>> +            gem_execbuf(fd, &execbuf);
>> +
>> +        execbuf.rsvd1 = ctx0;
>> +        for (i = 0; i < 5; i++)
>> +            gem_execbuf(fd, &execbuf);
>> +
>> +        gem_sync(fd, obj.handle);
>> +        igt_spin_batch_free(fd, hang);
>> +        gem_context_destroy(fd, ctx);
>> +        gem_close(fd, obj.handle);
>> +    }
>> +
>> +    gem_context_destroy(fd, ctx0);
>> +}
>> +
>>   static int fd = -1;
>>   static void
>> @@ -635,6 +703,12 @@ igt_main
>>       igt_subtest("in-flight-suspend")
>>           test_inflight_suspend(fd);
>> +    igt_subtest("reset-stress")
>> +        test_reset_stress(fd, 0);
>> +
>> +    igt_subtest("unwedge-stress")
>> +        test_reset_stress(fd, TEST_WEDGE);
>> +
>>       igt_subtest_group {
>>           const struct {
>>               unsigned int wait;
>>
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
  2018-04-03 18:34         ` Antonio Argenziano
@ 2018-04-04  9:58           ` Tvrtko Ursulin
  -1 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-04-04  9:58 UTC (permalink / raw)
  To: Antonio Argenziano, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 03/04/2018 19:34, Antonio Argenziano wrote:
> 
> 
> On 03/04/18 11:24, Antonio Argenziano wrote:
>>
>>
>> On 03/04/18 04:36, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>> Reset and unwedge stress testing is supposed to trigger wedging or 
>>> resets
>>> at incovenient times and then re-use the context so either the 
>>> context or
>>> driver tracking might get confused and break.
>>>
>>> v2:
>>>   * Renamed for more sensible naming.
>>>   * Added some comments to explain what the test is doing. (Chris 
>>> Wilson)
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>   tests/gem_eio.c | 74 
>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 74 insertions(+)
>>>
>>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
>>> index b7c5047f0816..9599e73db736 100644
>>> --- a/tests/gem_eio.c
>>> +++ b/tests/gem_eio.c
>>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
>>> unsigned int wait)
>>>       close(fd);
>>>   }
>>> +/*
>>> + * Verify that we can submit and execute work after unwedging the GPU.
>>> + */
>>> +static void test_reset_stress(int fd, unsigned int flags)
>>> +{
>>> +    uint32_t ctx0 = gem_context_create(fd);
>>> +
>>> +    igt_until_timeout(5) {
>>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
>>> +        struct drm_i915_gem_exec_object2 obj = { };
>>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +        igt_spin_t *hang;
>>> +        unsigned int i;
>>> +        uint32_t ctx;
>>> +
>>> +        gem_quiescent_gpu(fd);
>>> +
>>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
>>> +                           false : true));
>>> +
>>> +        ctx = context_create_safe(fd);
>>> +
>>> +        /*
>>> +         * Start executing a spin batch with some queued batches
>>> +         * against a different context after it.
>>> +         */
>>
>> Aren't all batches queued on ctx0? Or is this a reference to the check 
>> on ctx you have later in the test.

Yes, a mistake in comment text.

>>> +        hang = spin_sync(fd, ctx0, 0);
> 
> I think you meant to send this^ on ctx.

Why do you think so? Did you find a different or better way to trigger 
the bug this test is trying to hit?

Regards,

Tvrtko

> Antonio.
> 
>>> +
>>> +        obj.handle = gem_create(fd, 4096);
>>> +        gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>>> +
>>> +        execbuf.buffers_ptr = to_user_pointer(&obj);
>>> +        execbuf.buffer_count = 1;
>>> +        execbuf.rsvd1 = ctx0;
>>> +
>>> +        for (i = 0; i < 10; i++)
>>> +            gem_execbuf(fd, &execbuf);
>>> +
>>> +        /* Wedge after a small delay. */
>>> +        igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
>>> +
>>> +        /* Unwedge by forcing a reset. */
>>> +        igt_assert(i915_reset_control(true));
>>> +        trigger_reset(fd);
>>> +
>>> +        gem_quiescent_gpu(fd);
>>> +
>>> +        /*
>>> +         * Verify that we are able to submit work after unwedging from
>>> +         * both contexts.
>>> +         */
>>> +        execbuf.rsvd1 = ctx;
>>> +        for (i = 0; i < 5; i++)
>>> +            gem_execbuf(fd, &execbuf);
>>> +
>>> +        execbuf.rsvd1 = ctx0;
>>> +        for (i = 0; i < 5; i++)
>>> +            gem_execbuf(fd, &execbuf);
>>> +
>>> +        gem_sync(fd, obj.handle);
>>> +        igt_spin_batch_free(fd, hang);
>>> +        gem_context_destroy(fd, ctx);
>>> +        gem_close(fd, obj.handle);
>>> +    }
>>> +
>>> +    gem_context_destroy(fd, ctx0);
>>> +}
>>> +
>>>   static int fd = -1;
>>>   static void
>>> @@ -635,6 +703,12 @@ igt_main
>>>       igt_subtest("in-flight-suspend")
>>>           test_inflight_suspend(fd);
>>> +    igt_subtest("reset-stress")
>>> +        test_reset_stress(fd, 0);
>>> +
>>> +    igt_subtest("unwedge-stress")
>>> +        test_reset_stress(fd, TEST_WEDGE);
>>> +
>>>       igt_subtest_group {
>>>           const struct {
>>>               unsigned int wait;
>>>
>> _______________________________________________
>> igt-dev mailing list
>> igt-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
@ 2018-04-04  9:58           ` Tvrtko Ursulin
  0 siblings, 0 replies; 22+ messages in thread
From: Tvrtko Ursulin @ 2018-04-04  9:58 UTC (permalink / raw)
  To: Antonio Argenziano, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx


On 03/04/2018 19:34, Antonio Argenziano wrote:
> 
> 
> On 03/04/18 11:24, Antonio Argenziano wrote:
>>
>>
>> On 03/04/18 04:36, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>> Reset and unwedge stress testing is supposed to trigger wedging or 
>>> resets
>>> at incovenient times and then re-use the context so either the 
>>> context or
>>> driver tracking might get confused and break.
>>>
>>> v2:
>>>   * Renamed for more sensible naming.
>>>   * Added some comments to explain what the test is doing. (Chris 
>>> Wilson)
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>   tests/gem_eio.c | 74 
>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 74 insertions(+)
>>>
>>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
>>> index b7c5047f0816..9599e73db736 100644
>>> --- a/tests/gem_eio.c
>>> +++ b/tests/gem_eio.c
>>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
>>> unsigned int wait)
>>>       close(fd);
>>>   }
>>> +/*
>>> + * Verify that we can submit and execute work after unwedging the GPU.
>>> + */
>>> +static void test_reset_stress(int fd, unsigned int flags)
>>> +{
>>> +    uint32_t ctx0 = gem_context_create(fd);
>>> +
>>> +    igt_until_timeout(5) {
>>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
>>> +        struct drm_i915_gem_exec_object2 obj = { };
>>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +        igt_spin_t *hang;
>>> +        unsigned int i;
>>> +        uint32_t ctx;
>>> +
>>> +        gem_quiescent_gpu(fd);
>>> +
>>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
>>> +                           false : true));
>>> +
>>> +        ctx = context_create_safe(fd);
>>> +
>>> +        /*
>>> +         * Start executing a spin batch with some queued batches
>>> +         * against a different context after it.
>>> +         */
>>
>> Aren't all batches queued on ctx0? Or is this a reference to the check 
>> on ctx you have later in the test.

Yes, a mistake in comment text.

>>> +        hang = spin_sync(fd, ctx0, 0);
> 
> I think you meant to send this^ on ctx.

Why do you think so? Did you find a different or better way to trigger 
the bug this test is trying to hit?

Regards,

Tvrtko

> Antonio.
> 
>>> +
>>> +        obj.handle = gem_create(fd, 4096);
>>> +        gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>>> +
>>> +        execbuf.buffers_ptr = to_user_pointer(&obj);
>>> +        execbuf.buffer_count = 1;
>>> +        execbuf.rsvd1 = ctx0;
>>> +
>>> +        for (i = 0; i < 10; i++)
>>> +            gem_execbuf(fd, &execbuf);
>>> +
>>> +        /* Wedge after a small delay. */
>>> +        igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
>>> +
>>> +        /* Unwedge by forcing a reset. */
>>> +        igt_assert(i915_reset_control(true));
>>> +        trigger_reset(fd);
>>> +
>>> +        gem_quiescent_gpu(fd);
>>> +
>>> +        /*
>>> +         * Verify that we are able to submit work after unwedging from
>>> +         * both contexts.
>>> +         */
>>> +        execbuf.rsvd1 = ctx;
>>> +        for (i = 0; i < 5; i++)
>>> +            gem_execbuf(fd, &execbuf);
>>> +
>>> +        execbuf.rsvd1 = ctx0;
>>> +        for (i = 0; i < 5; i++)
>>> +            gem_execbuf(fd, &execbuf);
>>> +
>>> +        gem_sync(fd, obj.handle);
>>> +        igt_spin_batch_free(fd, hang);
>>> +        gem_context_destroy(fd, ctx);
>>> +        gem_close(fd, obj.handle);
>>> +    }
>>> +
>>> +    gem_context_destroy(fd, ctx0);
>>> +}
>>> +
>>>   static int fd = -1;
>>>   static void
>>> @@ -635,6 +703,12 @@ igt_main
>>>       igt_subtest("in-flight-suspend")
>>>           test_inflight_suspend(fd);
>>> +    igt_subtest("reset-stress")
>>> +        test_reset_stress(fd, 0);
>>> +
>>> +    igt_subtest("unwedge-stress")
>>> +        test_reset_stress(fd, TEST_WEDGE);
>>> +
>>>       igt_subtest_group {
>>>           const struct {
>>>               unsigned int wait;
>>>
>> _______________________________________________
>> igt-dev mailing list
>> igt-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
  2018-04-04  9:58           ` [Intel-gfx] " Tvrtko Ursulin
@ 2018-04-04 10:06             ` Chris Wilson
  -1 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2018-04-04 10:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, Antonio Argenziano, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-04 10:58:14)
> 
> On 03/04/2018 19:34, Antonio Argenziano wrote:
> > 
> > 
> > On 03/04/18 11:24, Antonio Argenziano wrote:
> >>
> >>
> >> On 03/04/18 04:36, Tvrtko Ursulin wrote:
> >>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>
> >>> Reset and unwedge stress testing is supposed to trigger wedging or 
> >>> resets
> >>> at incovenient times and then re-use the context so either the 
> >>> context or
> >>> driver tracking might get confused and break.
> >>>
> >>> v2:
> >>>   * Renamed for more sensible naming.
> >>>   * Added some comments to explain what the test is doing. (Chris 
> >>> Wilson)
> >>>
> >>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>   tests/gem_eio.c | 74 
> >>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>   1 file changed, 74 insertions(+)
> >>>
> >>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
> >>> index b7c5047f0816..9599e73db736 100644
> >>> --- a/tests/gem_eio.c
> >>> +++ b/tests/gem_eio.c
> >>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
> >>> unsigned int wait)
> >>>       close(fd);
> >>>   }
> >>> +/*
> >>> + * Verify that we can submit and execute work after unwedging the GPU.
> >>> + */
> >>> +static void test_reset_stress(int fd, unsigned int flags)
> >>> +{
> >>> +    uint32_t ctx0 = gem_context_create(fd);
> >>> +
> >>> +    igt_until_timeout(5) {
> >>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
> >>> +        struct drm_i915_gem_exec_object2 obj = { };
> >>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
> >>> +        igt_spin_t *hang;
> >>> +        unsigned int i;
> >>> +        uint32_t ctx;
> >>> +
> >>> +        gem_quiescent_gpu(fd);
> >>> +
> >>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
> >>> +                           false : true));
> >>> +
> >>> +        ctx = context_create_safe(fd);
> >>> +
> >>> +        /*
> >>> +         * Start executing a spin batch with some queued batches
> >>> +         * against a different context after it.
> >>> +         */
> >>
> >> Aren't all batches queued on ctx0? Or is this a reference to the check 
> >> on ctx you have later in the test.
> 
> Yes, a mistake in comment text.
> 
> >>> +        hang = spin_sync(fd, ctx0, 0);
> > 
> > I think you meant to send this^ on ctx.
> 
> Why do you think so? Did you find a different or better way to trigger 
> the bug this test is trying to hit?

You might need to explain that this test was trying to reproduce a
kernel bug around unwedging you found earlier, and instead managed to
find a similar one. ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
@ 2018-04-04 10:06             ` Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2018-04-04 10:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, Antonio Argenziano, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx

Quoting Tvrtko Ursulin (2018-04-04 10:58:14)
> 
> On 03/04/2018 19:34, Antonio Argenziano wrote:
> > 
> > 
> > On 03/04/18 11:24, Antonio Argenziano wrote:
> >>
> >>
> >> On 03/04/18 04:36, Tvrtko Ursulin wrote:
> >>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>
> >>> Reset and unwedge stress testing is supposed to trigger wedging or 
> >>> resets
> >>> at incovenient times and then re-use the context so either the 
> >>> context or
> >>> driver tracking might get confused and break.
> >>>
> >>> v2:
> >>>   * Renamed for more sensible naming.
> >>>   * Added some comments to explain what the test is doing. (Chris 
> >>> Wilson)
> >>>
> >>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>   tests/gem_eio.c | 74 
> >>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>   1 file changed, 74 insertions(+)
> >>>
> >>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
> >>> index b7c5047f0816..9599e73db736 100644
> >>> --- a/tests/gem_eio.c
> >>> +++ b/tests/gem_eio.c
> >>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
> >>> unsigned int wait)
> >>>       close(fd);
> >>>   }
> >>> +/*
> >>> + * Verify that we can submit and execute work after unwedging the GPU.
> >>> + */
> >>> +static void test_reset_stress(int fd, unsigned int flags)
> >>> +{
> >>> +    uint32_t ctx0 = gem_context_create(fd);
> >>> +
> >>> +    igt_until_timeout(5) {
> >>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
> >>> +        struct drm_i915_gem_exec_object2 obj = { };
> >>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
> >>> +        igt_spin_t *hang;
> >>> +        unsigned int i;
> >>> +        uint32_t ctx;
> >>> +
> >>> +        gem_quiescent_gpu(fd);
> >>> +
> >>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
> >>> +                           false : true));
> >>> +
> >>> +        ctx = context_create_safe(fd);
> >>> +
> >>> +        /*
> >>> +         * Start executing a spin batch with some queued batches
> >>> +         * against a different context after it.
> >>> +         */
> >>
> >> Aren't all batches queued on ctx0? Or is this a reference to the check 
> >> on ctx you have later in the test.
> 
> Yes, a mistake in comment text.
> 
> >>> +        hang = spin_sync(fd, ctx0, 0);
> > 
> > I think you meant to send this^ on ctx.
> 
> Why do you think so? Did you find a different or better way to trigger 
> the bug this test is trying to hit?

You might need to explain that this test was trying to reproduce a
kernel bug around unwedging you found earlier, and instead managed to
find a similar one. ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
  2018-04-04  9:58           ` [Intel-gfx] " Tvrtko Ursulin
@ 2018-04-04 16:54             ` Antonio Argenziano
  -1 siblings, 0 replies; 22+ messages in thread
From: Antonio Argenziano @ 2018-04-04 16:54 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx



On 04/04/18 02:58, Tvrtko Ursulin wrote:
> 
> On 03/04/2018 19:34, Antonio Argenziano wrote:
>>
>>
>> On 03/04/18 11:24, Antonio Argenziano wrote:
>>>
>>>
>>> On 03/04/18 04:36, Tvrtko Ursulin wrote:
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> Reset and unwedge stress testing is supposed to trigger wedging or 
>>>> resets
>>>> at incovenient times and then re-use the context so either the 
>>>> context or
>>>> driver tracking might get confused and break.
>>>>
>>>> v2:
>>>>   * Renamed for more sensible naming.
>>>>   * Added some comments to explain what the test is doing. (Chris 
>>>> Wilson)
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> ---
>>>>   tests/gem_eio.c | 74 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>   1 file changed, 74 insertions(+)
>>>>
>>>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
>>>> index b7c5047f0816..9599e73db736 100644
>>>> --- a/tests/gem_eio.c
>>>> +++ b/tests/gem_eio.c
>>>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
>>>> unsigned int wait)
>>>>       close(fd);
>>>>   }
>>>> +/*
>>>> + * Verify that we can submit and execute work after unwedging the GPU.
>>>> + */
>>>> +static void test_reset_stress(int fd, unsigned int flags)
>>>> +{
>>>> +    uint32_t ctx0 = gem_context_create(fd);
>>>> +
>>>> +    igt_until_timeout(5) {
>>>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
>>>> +        struct drm_i915_gem_exec_object2 obj = { };
>>>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
>>>> +        igt_spin_t *hang;
>>>> +        unsigned int i;
>>>> +        uint32_t ctx;
>>>> +
>>>> +        gem_quiescent_gpu(fd);
>>>> +
>>>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
>>>> +                           false : true));
>>>> +
>>>> +        ctx = context_create_safe(fd);
>>>> +
>>>> +        /*
>>>> +         * Start executing a spin batch with some queued batches
>>>> +         * against a different context after it.
>>>> +         */
>>>
>>> Aren't all batches queued on ctx0? Or is this a reference to the 
>>> check on ctx you have later in the test.
> 
> Yes, a mistake in comment text.
> 
>>>> +        hang = spin_sync(fd, ctx0, 0);
>>
>> I think you meant to send this^ on ctx.
> 
> Why do you think so? Did you find a different or better way to trigger 
> the bug this test is trying to hit?

Nope, I just misunderstood the code :). I thought you were creating ctx 
as 'safe' to be not 'bannable' because you were going to reuse the same 
context across multiple resets and didn't want it to be banned. BTW 
given that this is not the case wouldn't ctx0 be banned after so many 
resets?

Apologies for the cryptic comment,
Antonio.

> 
> Regards,
> 
> Tvrtko
> 
>> Antonio.
>>
>>>> +
>>>> +        obj.handle = gem_create(fd, 4096);
>>>> +        gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>>>> +
>>>> +        execbuf.buffers_ptr = to_user_pointer(&obj);
>>>> +        execbuf.buffer_count = 1;
>>>> +        execbuf.rsvd1 = ctx0;
>>>> +
>>>> +        for (i = 0; i < 10; i++)
>>>> +            gem_execbuf(fd, &execbuf);
>>>> +
>>>> +        /* Wedge after a small delay. */
>>>> +        igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
>>>> +
>>>> +        /* Unwedge by forcing a reset. */
>>>> +        igt_assert(i915_reset_control(true));
>>>> +        trigger_reset(fd);
>>>> +
>>>> +        gem_quiescent_gpu(fd);
>>>> +
>>>> +        /*
>>>> +         * Verify that we are able to submit work after unwedging from
>>>> +         * both contexts.
>>>> +         */
>>>> +        execbuf.rsvd1 = ctx;
>>>> +        for (i = 0; i < 5; i++)
>>>> +            gem_execbuf(fd, &execbuf);
>>>> +
>>>> +        execbuf.rsvd1 = ctx0;
>>>> +        for (i = 0; i < 5; i++)
>>>> +            gem_execbuf(fd, &execbuf);
>>>> +
>>>> +        gem_sync(fd, obj.handle);
>>>> +        igt_spin_batch_free(fd, hang);
>>>> +        gem_context_destroy(fd, ctx);
>>>> +        gem_close(fd, obj.handle);
>>>> +    }
>>>> +
>>>> +    gem_context_destroy(fd, ctx0);
>>>> +}
>>>> +
>>>>   static int fd = -1;
>>>>   static void
>>>> @@ -635,6 +703,12 @@ igt_main
>>>>       igt_subtest("in-flight-suspend")
>>>>           test_inflight_suspend(fd);
>>>> +    igt_subtest("reset-stress")
>>>> +        test_reset_stress(fd, 0);
>>>> +
>>>> +    igt_subtest("unwedge-stress")
>>>> +        test_reset_stress(fd, TEST_WEDGE);
>>>> +
>>>>       igt_subtest_group {
>>>>           const struct {
>>>>               unsigned int wait;
>>>>
>>> _______________________________________________
>>> igt-dev mailing list
>>> igt-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
>> _______________________________________________
>> igt-dev mailing list
>> igt-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [Intel-gfx] [igt-dev] [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing
@ 2018-04-04 16:54             ` Antonio Argenziano
  0 siblings, 0 replies; 22+ messages in thread
From: Antonio Argenziano @ 2018-04-04 16:54 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, igt-dev; +Cc: Intel-gfx



On 04/04/18 02:58, Tvrtko Ursulin wrote:
> 
> On 03/04/2018 19:34, Antonio Argenziano wrote:
>>
>>
>> On 03/04/18 11:24, Antonio Argenziano wrote:
>>>
>>>
>>> On 03/04/18 04:36, Tvrtko Ursulin wrote:
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> Reset and unwedge stress testing is supposed to trigger wedging or 
>>>> resets
>>>> at incovenient times and then re-use the context so either the 
>>>> context or
>>>> driver tracking might get confused and break.
>>>>
>>>> v2:
>>>>   * Renamed for more sensible naming.
>>>>   * Added some comments to explain what the test is doing. (Chris 
>>>> Wilson)
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> ---
>>>>   tests/gem_eio.c | 74 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>   1 file changed, 74 insertions(+)
>>>>
>>>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
>>>> index b7c5047f0816..9599e73db736 100644
>>>> --- a/tests/gem_eio.c
>>>> +++ b/tests/gem_eio.c
>>>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
>>>> unsigned int wait)
>>>>       close(fd);
>>>>   }
>>>> +/*
>>>> + * Verify that we can submit and execute work after unwedging the GPU.
>>>> + */
>>>> +static void test_reset_stress(int fd, unsigned int flags)
>>>> +{
>>>> +    uint32_t ctx0 = gem_context_create(fd);
>>>> +
>>>> +    igt_until_timeout(5) {
>>>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
>>>> +        struct drm_i915_gem_exec_object2 obj = { };
>>>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
>>>> +        igt_spin_t *hang;
>>>> +        unsigned int i;
>>>> +        uint32_t ctx;
>>>> +
>>>> +        gem_quiescent_gpu(fd);
>>>> +
>>>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
>>>> +                           false : true));
>>>> +
>>>> +        ctx = context_create_safe(fd);
>>>> +
>>>> +        /*
>>>> +         * Start executing a spin batch with some queued batches
>>>> +         * against a different context after it.
>>>> +         */
>>>
>>> Aren't all batches queued on ctx0? Or is this a reference to the 
>>> check on ctx you have later in the test.
> 
> Yes, a mistake in comment text.
> 
>>>> +        hang = spin_sync(fd, ctx0, 0);
>>
>> I think you meant to send this^ on ctx.
> 
> Why do you think so? Did you find a different or better way to trigger 
> the bug this test is trying to hit?

Nope, I just misunderstood the code :). I thought you were creating ctx 
as 'safe' to be not 'bannable' because you were going to reuse the same 
context across multiple resets and didn't want it to be banned. BTW 
given that this is not the case wouldn't ctx0 be banned after so many 
resets?

Apologies for the cryptic comment,
Antonio.

> 
> Regards,
> 
> Tvrtko
> 
>> Antonio.
>>
>>>> +
>>>> +        obj.handle = gem_create(fd, 4096);
>>>> +        gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>>>> +
>>>> +        execbuf.buffers_ptr = to_user_pointer(&obj);
>>>> +        execbuf.buffer_count = 1;
>>>> +        execbuf.rsvd1 = ctx0;
>>>> +
>>>> +        for (i = 0; i < 10; i++)
>>>> +            gem_execbuf(fd, &execbuf);
>>>> +
>>>> +        /* Wedge after a small delay. */
>>>> +        igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
>>>> +
>>>> +        /* Unwedge by forcing a reset. */
>>>> +        igt_assert(i915_reset_control(true));
>>>> +        trigger_reset(fd);
>>>> +
>>>> +        gem_quiescent_gpu(fd);
>>>> +
>>>> +        /*
>>>> +         * Verify that we are able to submit work after unwedging from
>>>> +         * both contexts.
>>>> +         */
>>>> +        execbuf.rsvd1 = ctx;
>>>> +        for (i = 0; i < 5; i++)
>>>> +            gem_execbuf(fd, &execbuf);
>>>> +
>>>> +        execbuf.rsvd1 = ctx0;
>>>> +        for (i = 0; i < 5; i++)
>>>> +            gem_execbuf(fd, &execbuf);
>>>> +
>>>> +        gem_sync(fd, obj.handle);
>>>> +        igt_spin_batch_free(fd, hang);
>>>> +        gem_context_destroy(fd, ctx);
>>>> +        gem_close(fd, obj.handle);
>>>> +    }
>>>> +
>>>> +    gem_context_destroy(fd, ctx0);
>>>> +}
>>>> +
>>>>   static int fd = -1;
>>>>   static void
>>>> @@ -635,6 +703,12 @@ igt_main
>>>>       igt_subtest("in-flight-suspend")
>>>>           test_inflight_suspend(fd);
>>>> +    igt_subtest("reset-stress")
>>>> +        test_reset_stress(fd, 0);
>>>> +
>>>> +    igt_subtest("unwedge-stress")
>>>> +        test_reset_stress(fd, TEST_WEDGE);
>>>> +
>>>>       igt_subtest_group {
>>>>           const struct {
>>>>               unsigned int wait;
>>>>
>>> _______________________________________________
>>> igt-dev mailing list
>>> igt-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
>> _______________________________________________
>> igt-dev mailing list
>> igt-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2018-04-04 16:54 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-29 13:05 [PATCH i-g-t v2 1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset Tvrtko Ursulin
2018-03-29 13:05 ` [igt-dev] " Tvrtko Ursulin
2018-03-29 13:05 ` [PATCH i-g-t 2/2] tests/gem_eio: Add context destroyer test Tvrtko Ursulin
2018-03-29 13:05   ` [igt-dev] " Tvrtko Ursulin
2018-04-03 11:36   ` [PATCH i-g-t v2 2/2] tests/gem_eio: Add reset and unwedge stress testing Tvrtko Ursulin
2018-04-03 11:36     ` [igt-dev] " Tvrtko Ursulin
2018-04-03 11:51     ` Chris Wilson
2018-04-03 11:51       ` [Intel-gfx] " Chris Wilson
2018-04-03 18:24     ` [igt-dev] " Antonio Argenziano
2018-04-03 18:24       ` Antonio Argenziano
2018-04-03 18:34       ` Antonio Argenziano
2018-04-03 18:34         ` Antonio Argenziano
2018-04-04  9:58         ` Tvrtko Ursulin
2018-04-04  9:58           ` [Intel-gfx] " Tvrtko Ursulin
2018-04-04 10:06           ` Chris Wilson
2018-04-04 10:06             ` [Intel-gfx] " Chris Wilson
2018-04-04 16:54           ` Antonio Argenziano
2018-04-04 16:54             ` [Intel-gfx] " Antonio Argenziano
2018-03-29 17:50 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,v2,1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset Patchwork
2018-03-29 22:14 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2018-04-03 13:39 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,v2,1/2] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset (rev2) Patchwork
2018-04-03 15:26 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.