All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] tests/gem_error_capture: Initial testcase for error state capture/dump
@ 2014-04-11 12:59 oscar.mateo
  2014-04-11 13:09 ` Mateo Lozano, Oscar
  2014-04-11 16:48 ` [PATCH v2] " oscar.mateo
  0 siblings, 2 replies; 15+ messages in thread
From: oscar.mateo @ 2014-04-11 12:59 UTC (permalink / raw)
  To: intel-gfx

From: Oscar Mateo <oscar.mateo@intel.com>

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 tests/.gitignore          |   1 +
 tests/Makefile.sources    |   1 +
 tests/gem_error_capture.c | 230 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 232 insertions(+)
 create mode 100644 tests/gem_error_capture.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 146bab0..945574c 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -27,6 +27,7 @@ gem_ctx_create
 gem_ctx_exec
 gem_double_irq_loop
 gem_dummy_reloc_loop
+gem_error_capture
 gem_evict_alignment
 gem_evict_everything
 gem_exec_bad_domains
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index bf02a48..612beb6 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -24,6 +24,7 @@ TESTS_progs_M = \
 	gem_ctx_bad_exec \
 	gem_ctx_exec \
 	gem_dummy_reloc_loop \
+	gem_error_capture \
 	gem_evict_alignment \
 	gem_evict_everything \
 	gem_exec_bad_domains \
diff --git a/tests/gem_error_capture.c b/tests/gem_error_capture.c
new file mode 100644
index 0000000..bbf0f5d
--- /dev/null
+++ b/tests/gem_error_capture.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Oscar Mateo <oscar.mateo@intel.com>
+ *
+ */
+
+/*
+ * Testcase: Check whether basic error state capture/dump mechanism is correctly
+ * working for all rings.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "ioctl_wrappers.h"
+#include "drmtest.h"
+#include "intel_io.h"
+#include "igt_debugfs.h"
+
+#define MAGIC_NUMBER 0x10001
+uint32_t batch[4] = {MI_NOOP, MI_BATCH_BUFFER_END, MAGIC_NUMBER, MAGIC_NUMBER};
+
+static void stop_rings(void)
+{
+	int fd;
+	static const char buf[] = "0xf";
+
+	fd = igt_debugfs_open("i915_ring_stop", O_WRONLY);
+	igt_assert(fd >= 0);
+
+	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
+
+	close(fd);
+}
+
+static bool rings_stopped(void)
+{
+	int fd;
+	static char buf[128];
+	unsigned long long val;
+
+	fd = igt_debugfs_open("i915_ring_stop", O_RDONLY);
+	igt_assert(fd >= 0);
+
+	igt_assert(read(fd, buf, sizeof(buf)) > 0);
+	close(fd);
+
+	sscanf(buf, "%llx", &val);
+
+	return (bool)val;
+}
+
+static void clear_error_state(void)
+{
+	int fd;
+	static const char buf[] = "";
+
+	fd = igt_debugfs_open("i915_error_state", O_WRONLY);
+	igt_assert(fd >= 0);
+
+	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
+	close(fd);
+}
+
+static void check_bb_contents(char **line, size_t *line_size,
+		FILE *file, int pos, uint32_t expected_value)
+{
+	char expected_line[32];
+
+	igt_assert(getline(line, line_size, file) > 0);
+
+	snprintf(expected_line, sizeof(expected_line), "%08x :  %08x",
+			4*pos, expected_value);
+
+	igt_assert(strstr(*line, expected_line));
+}
+
+static void check_error_state(const char *expected_ring_name,
+				uint64_t expected_offset)
+{
+	FILE *file;
+	int debug_fd;
+	char *line = NULL;
+	size_t line_size;
+	char *dashes = NULL;
+	char *ring_name = NULL;
+	uint32_t gtt_offset = 0;
+	bool matched;
+	int i;
+
+	debug_fd = igt_debugfs_open("i915_error_state", O_RDONLY);
+	igt_assert(debug_fd >= 0);
+	file = fdopen(debug_fd, "r");
+
+	while (getline(&line, &line_size, file) > 0) {
+		dashes = strstr(line, "---");
+		if (dashes) {
+			ring_name = realloc(ring_name, dashes - line);
+			strncpy(ring_name, line, dashes - line);
+			ring_name[dashes - line - 1] = '\0';
+
+			matched = sscanf(dashes, "--- gtt_offset = 0x%08x\n",
+					&gtt_offset);
+			if (matched) {
+				igt_assert(strstr(ring_name, expected_ring_name));
+				igt_assert(gtt_offset == expected_offset);
+
+				for (i = 0; i < sizeof(batch) / 4; i++)
+					check_bb_contents(&line, &line_size,
+							file, i, batch[i]);
+
+				break;
+			}
+		}
+	}
+
+	igt_assert(matched);
+
+	free(line);
+	free(ring_name);
+
+	close(debug_fd);
+}
+
+static void test(int fd, uint32_t handle, unsigned ring_id, const char *ring_name)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec;
+	uint64_t presumed_offset;
+
+	gem_require_ring(fd, ring_id);
+
+	clear_error_state();
+
+	exec.handle = handle;
+	exec.relocation_count = 0;
+	exec.relocs_ptr = 0;
+	exec.alignment = 0;
+	exec.offset = 0;
+	exec.flags = 0;
+	exec.rsvd1 = 0;
+	exec.rsvd2 = 0;
+
+	execbuf.buffers_ptr = (uintptr_t)&exec;
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 16;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = ring_id;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, handle);
+
+	presumed_offset = exec.offset;
+
+	stop_rings();
+
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, handle);
+
+	igt_assert(rings_stopped() == false);
+	igt_assert(presumed_offset == exec.offset);
+
+	check_error_state(ring_name, exec.offset);
+}
+
+uint32_t handle;
+int fd;
+
+igt_main
+{
+	igt_fixture {
+		fd = drm_open_any();
+
+		handle = gem_create(fd, 4096);
+		gem_write(fd, handle, 0, batch, sizeof(batch));
+	}
+
+	igt_subtest("render")
+		test(fd, handle, I915_EXEC_RENDER, "render ring");
+
+	igt_subtest("bsd")
+		test(fd, handle, I915_EXEC_BSD, "bsd ring");
+
+	igt_subtest("blt")
+		test(fd, handle, I915_EXEC_BLT, "blitter ring");
+
+	igt_subtest("vebox")
+		test(fd, handle, I915_EXEC_VEBOX, "video enhancement ring");
+
+	igt_fixture {
+		gem_close(fd, handle);
+
+		close(fd);
+	}
+}
-- 
1.9.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-11 12:59 [PATCH] tests/gem_error_capture: Initial testcase for error state capture/dump oscar.mateo
@ 2014-04-11 13:09 ` Mateo Lozano, Oscar
  2014-04-11 14:27   ` Daniel Vetter
  2014-04-11 16:48 ` [PATCH v2] " oscar.mateo
  1 sibling, 1 reply; 15+ messages in thread
From: Mateo Lozano, Oscar @ 2014-04-11 13:09 UTC (permalink / raw)
  To: intel-gfx, Daniel Vetter (daniel.vetter@ffwll.ch)

Daniel, is this what you had in mind?

-- Oscar

P.S. I just re-read the Jira task and realized that I am missing the "check that the ring objects contains a reloc with MI_BB_START for your presumed batch object's address". I´ll add this and resubmit.

> -----Original Message-----
> From: Mateo Lozano, Oscar
> Sent: Friday, April 11, 2014 2:00 PM
> To: intel-gfx@lists.freedesktop.org
> Cc: Mateo Lozano, Oscar
> Subject: [PATCH] tests/gem_error_capture: Initial testcase for error state
> capture/dump
> 
> From: Oscar Mateo <oscar.mateo@intel.com>
> 
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> ---
>  tests/.gitignore          |   1 +
>  tests/Makefile.sources    |   1 +
>  tests/gem_error_capture.c | 230
> ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 232 insertions(+)
>  create mode 100644 tests/gem_error_capture.c
> 
> diff --git a/tests/.gitignore b/tests/.gitignore index 146bab0..945574c 100644
> --- a/tests/.gitignore
> +++ b/tests/.gitignore
> @@ -27,6 +27,7 @@ gem_ctx_create
>  gem_ctx_exec
>  gem_double_irq_loop
>  gem_dummy_reloc_loop
> +gem_error_capture
>  gem_evict_alignment
>  gem_evict_everything
>  gem_exec_bad_domains
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources index
> bf02a48..612beb6 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -24,6 +24,7 @@ TESTS_progs_M = \
>  	gem_ctx_bad_exec \
>  	gem_ctx_exec \
>  	gem_dummy_reloc_loop \
> +	gem_error_capture \
>  	gem_evict_alignment \
>  	gem_evict_everything \
>  	gem_exec_bad_domains \
> diff --git a/tests/gem_error_capture.c b/tests/gem_error_capture.c new file
> mode 100644 index 0000000..bbf0f5d
> --- /dev/null
> +++ b/tests/gem_error_capture.c
> @@ -0,0 +1,230 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> +next
> + * paragraph) shall be included in all copies or substantial portions
> +of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT
> +SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> DAMAGES OR
> +OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> +ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> OTHER
> +DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Oscar Mateo <oscar.mateo@intel.com>
> + *
> + */
> +
> +/*
> + * Testcase: Check whether basic error state capture/dump mechanism is
> +correctly
> + * working for all rings.
> + */
> +
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include "drm.h"
> +#include "ioctl_wrappers.h"
> +#include "drmtest.h"
> +#include "intel_io.h"
> +#include "igt_debugfs.h"
> +
> +#define MAGIC_NUMBER 0x10001
> +uint32_t batch[4] = {MI_NOOP, MI_BATCH_BUFFER_END, MAGIC_NUMBER,
> +MAGIC_NUMBER};
> +
> +static void stop_rings(void)
> +{
> +	int fd;
> +	static const char buf[] = "0xf";
> +
> +	fd = igt_debugfs_open("i915_ring_stop", O_WRONLY);
> +	igt_assert(fd >= 0);
> +
> +	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
> +
> +	close(fd);
> +}
> +
> +static bool rings_stopped(void)
> +{
> +	int fd;
> +	static char buf[128];
> +	unsigned long long val;
> +
> +	fd = igt_debugfs_open("i915_ring_stop", O_RDONLY);
> +	igt_assert(fd >= 0);
> +
> +	igt_assert(read(fd, buf, sizeof(buf)) > 0);
> +	close(fd);
> +
> +	sscanf(buf, "%llx", &val);
> +
> +	return (bool)val;
> +}
> +
> +static void clear_error_state(void)
> +{
> +	int fd;
> +	static const char buf[] = "";
> +
> +	fd = igt_debugfs_open("i915_error_state", O_WRONLY);
> +	igt_assert(fd >= 0);
> +
> +	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
> +	close(fd);
> +}
> +
> +static void check_bb_contents(char **line, size_t *line_size,
> +		FILE *file, int pos, uint32_t expected_value) {
> +	char expected_line[32];
> +
> +	igt_assert(getline(line, line_size, file) > 0);
> +
> +	snprintf(expected_line, sizeof(expected_line), "%08x :  %08x",
> +			4*pos, expected_value);
> +
> +	igt_assert(strstr(*line, expected_line)); }
> +
> +static void check_error_state(const char *expected_ring_name,
> +				uint64_t expected_offset)
> +{
> +	FILE *file;
> +	int debug_fd;
> +	char *line = NULL;
> +	size_t line_size;
> +	char *dashes = NULL;
> +	char *ring_name = NULL;
> +	uint32_t gtt_offset = 0;
> +	bool matched;
> +	int i;
> +
> +	debug_fd = igt_debugfs_open("i915_error_state", O_RDONLY);
> +	igt_assert(debug_fd >= 0);
> +	file = fdopen(debug_fd, "r");
> +
> +	while (getline(&line, &line_size, file) > 0) {
> +		dashes = strstr(line, "---");
> +		if (dashes) {
> +			ring_name = realloc(ring_name, dashes - line);
> +			strncpy(ring_name, line, dashes - line);
> +			ring_name[dashes - line - 1] = '\0';
> +
> +			matched = sscanf(dashes, "--- gtt_offset = 0x%08x\n",
> +					&gtt_offset);
> +			if (matched) {
> +				igt_assert(strstr(ring_name,
> expected_ring_name));
> +				igt_assert(gtt_offset == expected_offset);
> +
> +				for (i = 0; i < sizeof(batch) / 4; i++)
> +					check_bb_contents(&line, &line_size,
> +							file, i, batch[i]);
> +
> +				break;
> +			}
> +		}
> +	}
> +
> +	igt_assert(matched);
> +
> +	free(line);
> +	free(ring_name);
> +
> +	close(debug_fd);
> +}
> +
> +static void test(int fd, uint32_t handle, unsigned ring_id, const char
> +*ring_name) {
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	struct drm_i915_gem_exec_object2 exec;
> +	uint64_t presumed_offset;
> +
> +	gem_require_ring(fd, ring_id);
> +
> +	clear_error_state();
> +
> +	exec.handle = handle;
> +	exec.relocation_count = 0;
> +	exec.relocs_ptr = 0;
> +	exec.alignment = 0;
> +	exec.offset = 0;
> +	exec.flags = 0;
> +	exec.rsvd1 = 0;
> +	exec.rsvd2 = 0;
> +
> +	execbuf.buffers_ptr = (uintptr_t)&exec;
> +	execbuf.buffer_count = 1;
> +	execbuf.batch_start_offset = 0;
> +	execbuf.batch_len = 16;
> +	execbuf.cliprects_ptr = 0;
> +	execbuf.num_cliprects = 0;
> +	execbuf.DR1 = 0;
> +	execbuf.DR4 = 0;
> +	execbuf.flags = ring_id;
> +	i915_execbuffer2_set_context_id(execbuf, 0);
> +	execbuf.rsvd2 = 0;
> +
> +	gem_execbuf(fd, &execbuf);
> +	gem_sync(fd, handle);
> +
> +	presumed_offset = exec.offset;
> +
> +	stop_rings();
> +
> +	gem_execbuf(fd, &execbuf);
> +	gem_sync(fd, handle);
> +
> +	igt_assert(rings_stopped() == false);
> +	igt_assert(presumed_offset == exec.offset);
> +
> +	check_error_state(ring_name, exec.offset); }
> +
> +uint32_t handle;
> +int fd;
> +
> +igt_main
> +{
> +	igt_fixture {
> +		fd = drm_open_any();
> +
> +		handle = gem_create(fd, 4096);
> +		gem_write(fd, handle, 0, batch, sizeof(batch));
> +	}
> +
> +	igt_subtest("render")
> +		test(fd, handle, I915_EXEC_RENDER, "render ring");
> +
> +	igt_subtest("bsd")
> +		test(fd, handle, I915_EXEC_BSD, "bsd ring");
> +
> +	igt_subtest("blt")
> +		test(fd, handle, I915_EXEC_BLT, "blitter ring");
> +
> +	igt_subtest("vebox")
> +		test(fd, handle, I915_EXEC_VEBOX, "video enhancement ring");
> +
> +	igt_fixture {
> +		gem_close(fd, handle);
> +
> +		close(fd);
> +	}
> +}
> --
> 1.9.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-11 13:09 ` Mateo Lozano, Oscar
@ 2014-04-11 14:27   ` Daniel Vetter
  0 siblings, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2014-04-11 14:27 UTC (permalink / raw)
  To: Mateo Lozano, Oscar; +Cc: Daniel Vetter (daniel.vetter@ffwll.ch), intel-gfx

On Fri, Apr 11, 2014 at 01:09:22PM +0000, Mateo Lozano, Oscar wrote:
> Daniel, is this what you had in mind?
> 
> -- Oscar
> 
> P.S. I just re-read the Jira task and realized that I am missing the
> "check that the ring objects contains a reloc with MI_BB_START for your
> presumed batch object's address". I´ll add this and resubmit.

Yeah this looks pretty cool. btw when you resend can you simply sign up
someone local to your team for the review? Bit easier that way.

I've checked your usage of igt_* infrastructure and that looks good.

Thanks, Daniel

> 
> > -----Original Message-----
> > From: Mateo Lozano, Oscar
> > Sent: Friday, April 11, 2014 2:00 PM
> > To: intel-gfx@lists.freedesktop.org
> > Cc: Mateo Lozano, Oscar
> > Subject: [PATCH] tests/gem_error_capture: Initial testcase for error state
> > capture/dump
> > 
> > From: Oscar Mateo <oscar.mateo@intel.com>
> > 
> > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> > ---
> >  tests/.gitignore          |   1 +
> >  tests/Makefile.sources    |   1 +
> >  tests/gem_error_capture.c | 230
> > ++++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 232 insertions(+)
> >  create mode 100644 tests/gem_error_capture.c
> > 
> > diff --git a/tests/.gitignore b/tests/.gitignore index 146bab0..945574c 100644
> > --- a/tests/.gitignore
> > +++ b/tests/.gitignore
> > @@ -27,6 +27,7 @@ gem_ctx_create
> >  gem_ctx_exec
> >  gem_double_irq_loop
> >  gem_dummy_reloc_loop
> > +gem_error_capture
> >  gem_evict_alignment
> >  gem_evict_everything
> >  gem_exec_bad_domains
> > diff --git a/tests/Makefile.sources b/tests/Makefile.sources index
> > bf02a48..612beb6 100644
> > --- a/tests/Makefile.sources
> > +++ b/tests/Makefile.sources
> > @@ -24,6 +24,7 @@ TESTS_progs_M = \
> >  	gem_ctx_bad_exec \
> >  	gem_ctx_exec \
> >  	gem_dummy_reloc_loop \
> > +	gem_error_capture \
> >  	gem_evict_alignment \
> >  	gem_evict_everything \
> >  	gem_exec_bad_domains \
> > diff --git a/tests/gem_error_capture.c b/tests/gem_error_capture.c new file
> > mode 100644 index 0000000..bbf0f5d
> > --- /dev/null
> > +++ b/tests/gem_error_capture.c
> > @@ -0,0 +1,230 @@
> > +/*
> > + * Copyright © 2014 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > +obtaining a
> > + * copy of this software and associated documentation files (the
> > +"Software"),
> > + * to deal in the Software without restriction, including without
> > +limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> > +sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom
> > +the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> > +next
> > + * paragraph) shall be included in all copies or substantial portions
> > +of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> > +EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > +MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> > EVENT
> > +SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR
> > +OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > +ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > OTHER
> > +DEALINGS
> > + * IN THE SOFTWARE.
> > + *
> > + * Authors:
> > + *    Oscar Mateo <oscar.mateo@intel.com>
> > + *
> > + */
> > +
> > +/*
> > + * Testcase: Check whether basic error state capture/dump mechanism is
> > +correctly
> > + * working for all rings.
> > + */
> > +
> > +#include <unistd.h>
> > +#include <stdlib.h>
> > +#include <stdint.h>
> > +#include <stdio.h>
> > +#include <string.h>
> > +#include <fcntl.h>
> > +#include <inttypes.h>
> > +#include <errno.h>
> > +#include <sys/stat.h>
> > +#include <sys/ioctl.h>
> > +#include "drm.h"
> > +#include "ioctl_wrappers.h"
> > +#include "drmtest.h"
> > +#include "intel_io.h"
> > +#include "igt_debugfs.h"
> > +
> > +#define MAGIC_NUMBER 0x10001
> > +uint32_t batch[4] = {MI_NOOP, MI_BATCH_BUFFER_END, MAGIC_NUMBER,
> > +MAGIC_NUMBER};
> > +
> > +static void stop_rings(void)
> > +{
> > +	int fd;
> > +	static const char buf[] = "0xf";
> > +
> > +	fd = igt_debugfs_open("i915_ring_stop", O_WRONLY);
> > +	igt_assert(fd >= 0);
> > +
> > +	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
> > +
> > +	close(fd);
> > +}
> > +
> > +static bool rings_stopped(void)
> > +{
> > +	int fd;
> > +	static char buf[128];
> > +	unsigned long long val;
> > +
> > +	fd = igt_debugfs_open("i915_ring_stop", O_RDONLY);
> > +	igt_assert(fd >= 0);
> > +
> > +	igt_assert(read(fd, buf, sizeof(buf)) > 0);
> > +	close(fd);
> > +
> > +	sscanf(buf, "%llx", &val);
> > +
> > +	return (bool)val;
> > +}
> > +
> > +static void clear_error_state(void)
> > +{
> > +	int fd;
> > +	static const char buf[] = "";
> > +
> > +	fd = igt_debugfs_open("i915_error_state", O_WRONLY);
> > +	igt_assert(fd >= 0);
> > +
> > +	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
> > +	close(fd);
> > +}
> > +
> > +static void check_bb_contents(char **line, size_t *line_size,
> > +		FILE *file, int pos, uint32_t expected_value) {
> > +	char expected_line[32];
> > +
> > +	igt_assert(getline(line, line_size, file) > 0);
> > +
> > +	snprintf(expected_line, sizeof(expected_line), "%08x :  %08x",
> > +			4*pos, expected_value);
> > +
> > +	igt_assert(strstr(*line, expected_line)); }
> > +
> > +static void check_error_state(const char *expected_ring_name,
> > +				uint64_t expected_offset)
> > +{
> > +	FILE *file;
> > +	int debug_fd;
> > +	char *line = NULL;
> > +	size_t line_size;
> > +	char *dashes = NULL;
> > +	char *ring_name = NULL;
> > +	uint32_t gtt_offset = 0;
> > +	bool matched;
> > +	int i;
> > +
> > +	debug_fd = igt_debugfs_open("i915_error_state", O_RDONLY);
> > +	igt_assert(debug_fd >= 0);
> > +	file = fdopen(debug_fd, "r");
> > +
> > +	while (getline(&line, &line_size, file) > 0) {
> > +		dashes = strstr(line, "---");
> > +		if (dashes) {
> > +			ring_name = realloc(ring_name, dashes - line);
> > +			strncpy(ring_name, line, dashes - line);
> > +			ring_name[dashes - line - 1] = '\0';
> > +
> > +			matched = sscanf(dashes, "--- gtt_offset = 0x%08x\n",
> > +					&gtt_offset);
> > +			if (matched) {
> > +				igt_assert(strstr(ring_name,
> > expected_ring_name));
> > +				igt_assert(gtt_offset == expected_offset);
> > +
> > +				for (i = 0; i < sizeof(batch) / 4; i++)
> > +					check_bb_contents(&line, &line_size,
> > +							file, i, batch[i]);
> > +
> > +				break;
> > +			}
> > +		}
> > +	}
> > +
> > +	igt_assert(matched);
> > +
> > +	free(line);
> > +	free(ring_name);
> > +
> > +	close(debug_fd);
> > +}
> > +
> > +static void test(int fd, uint32_t handle, unsigned ring_id, const char
> > +*ring_name) {
> > +	struct drm_i915_gem_execbuffer2 execbuf;
> > +	struct drm_i915_gem_exec_object2 exec;
> > +	uint64_t presumed_offset;
> > +
> > +	gem_require_ring(fd, ring_id);
> > +
> > +	clear_error_state();
> > +
> > +	exec.handle = handle;
> > +	exec.relocation_count = 0;
> > +	exec.relocs_ptr = 0;
> > +	exec.alignment = 0;
> > +	exec.offset = 0;
> > +	exec.flags = 0;
> > +	exec.rsvd1 = 0;
> > +	exec.rsvd2 = 0;
> > +
> > +	execbuf.buffers_ptr = (uintptr_t)&exec;
> > +	execbuf.buffer_count = 1;
> > +	execbuf.batch_start_offset = 0;
> > +	execbuf.batch_len = 16;
> > +	execbuf.cliprects_ptr = 0;
> > +	execbuf.num_cliprects = 0;
> > +	execbuf.DR1 = 0;
> > +	execbuf.DR4 = 0;
> > +	execbuf.flags = ring_id;
> > +	i915_execbuffer2_set_context_id(execbuf, 0);
> > +	execbuf.rsvd2 = 0;
> > +
> > +	gem_execbuf(fd, &execbuf);
> > +	gem_sync(fd, handle);
> > +
> > +	presumed_offset = exec.offset;
> > +
> > +	stop_rings();
> > +
> > +	gem_execbuf(fd, &execbuf);
> > +	gem_sync(fd, handle);
> > +
> > +	igt_assert(rings_stopped() == false);
> > +	igt_assert(presumed_offset == exec.offset);
> > +
> > +	check_error_state(ring_name, exec.offset); }
> > +
> > +uint32_t handle;
> > +int fd;
> > +
> > +igt_main
> > +{
> > +	igt_fixture {
> > +		fd = drm_open_any();
> > +
> > +		handle = gem_create(fd, 4096);
> > +		gem_write(fd, handle, 0, batch, sizeof(batch));
> > +	}
> > +
> > +	igt_subtest("render")
> > +		test(fd, handle, I915_EXEC_RENDER, "render ring");
> > +
> > +	igt_subtest("bsd")
> > +		test(fd, handle, I915_EXEC_BSD, "bsd ring");
> > +
> > +	igt_subtest("blt")
> > +		test(fd, handle, I915_EXEC_BLT, "blitter ring");
> > +
> > +	igt_subtest("vebox")
> > +		test(fd, handle, I915_EXEC_VEBOX, "video enhancement ring");
> > +
> > +	igt_fixture {
> > +		gem_close(fd, handle);
> > +
> > +		close(fd);
> > +	}
> > +}
> > --
> > 1.9.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-11 12:59 [PATCH] tests/gem_error_capture: Initial testcase for error state capture/dump oscar.mateo
  2014-04-11 13:09 ` Mateo Lozano, Oscar
@ 2014-04-11 16:48 ` oscar.mateo
  2014-04-11 17:12   ` Chris Wilson
                     ` (2 more replies)
  1 sibling, 3 replies; 15+ messages in thread
From: oscar.mateo @ 2014-04-11 16:48 UTC (permalink / raw)
  To: intel-gfx

From: Oscar Mateo <oscar.mateo@intel.com>

Guarantees that error capture works at a very basic level.

v2: Also check that the ring object contains a reloc with MI_BB_START
for the presumed batch object's address.

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 tests/.gitignore          |   1 +
 tests/Makefile.sources    |   1 +
 tests/gem_error_capture.c | 269 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 271 insertions(+)
 create mode 100644 tests/gem_error_capture.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 146bab0..945574c 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -27,6 +27,7 @@ gem_ctx_create
 gem_ctx_exec
 gem_double_irq_loop
 gem_dummy_reloc_loop
+gem_error_capture
 gem_evict_alignment
 gem_evict_everything
 gem_exec_bad_domains
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index bf02a48..612beb6 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -24,6 +24,7 @@ TESTS_progs_M = \
 	gem_ctx_bad_exec \
 	gem_ctx_exec \
 	gem_dummy_reloc_loop \
+	gem_error_capture \
 	gem_evict_alignment \
 	gem_evict_everything \
 	gem_exec_bad_domains \
diff --git a/tests/gem_error_capture.c b/tests/gem_error_capture.c
new file mode 100644
index 0000000..88845c9
--- /dev/null
+++ b/tests/gem_error_capture.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Oscar Mateo <oscar.mateo@intel.com>
+ *
+ */
+
+/*
+ * Testcase: Check whether basic error state capture/dump mechanism is correctly
+ * working for all rings.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "ioctl_wrappers.h"
+#include "drmtest.h"
+#include "intel_io.h"
+#include "igt_debugfs.h"
+
+#define MAGIC_NUMBER 0x10001
+uint32_t batch[4] = {MI_NOOP, MI_BATCH_BUFFER_END, MAGIC_NUMBER, MAGIC_NUMBER};
+
+static void stop_rings(void)
+{
+	int fd;
+	static const char buf[] = "0xf";
+
+	fd = igt_debugfs_open("i915_ring_stop", O_WRONLY);
+	igt_assert(fd >= 0);
+
+	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
+
+	close(fd);
+}
+
+static bool rings_stopped(void)
+{
+	int fd;
+	static char buf[128];
+	unsigned long long val;
+
+	fd = igt_debugfs_open("i915_ring_stop", O_RDONLY);
+	igt_assert(fd >= 0);
+
+	igt_assert(read(fd, buf, sizeof(buf)) > 0);
+	close(fd);
+
+	sscanf(buf, "%llx", &val);
+
+	return (bool)val;
+}
+
+static void clear_error_state(void)
+{
+	int fd;
+	static const char buf[] = "";
+
+	fd = igt_debugfs_open("i915_error_state", O_WRONLY);
+	igt_assert(fd >= 0);
+
+	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
+	close(fd);
+}
+
+static void check_error_state(const char *expected_ring_name,
+				uint64_t expected_offset)
+{
+	FILE *file;
+	int debug_fd;
+	char *line = NULL;
+	size_t line_size = 0;
+	char *dashes = NULL;
+	char *ring_name = NULL;
+	int bb_matched = 0;
+	uint32_t gtt_offset;
+	char expected_line[32];
+	int req_matched = 0;
+	int requests;
+	uint32_t seqno, tail;
+	long jiffies;
+	int ringbuf_matched = 0;
+	unsigned int offset, command, expected_addr = 0;
+	int i, items;
+	bool bb_ok = false, req_ok = false, ringbuf_ok = false;
+
+	debug_fd = igt_debugfs_open("i915_error_state", O_RDONLY);
+	igt_assert(debug_fd >= 0);
+	file = fdopen(debug_fd, "r");
+
+	while (getline(&line, &line_size, file) > 0) {
+		dashes = strstr(line, "---");
+		if (!dashes)
+			continue;
+
+		ring_name = realloc(ring_name, dashes - line);
+		strncpy(ring_name, line, dashes - line);
+		ring_name[dashes - line - 1] = '\0';
+
+		bb_matched = sscanf(dashes, "--- gtt_offset = 0x%08x\n",
+				&gtt_offset);
+		if (bb_matched == 1) {
+			igt_assert(strstr(ring_name, expected_ring_name));
+			igt_assert(gtt_offset == expected_offset);
+
+			for (i = 0; i < sizeof(batch) / 4; i++) {
+				igt_assert(getline(&line, &line_size, file) > 0);
+				snprintf(expected_line, sizeof(expected_line), "%08x :  %08x",
+						4*i, batch[i]);
+				igt_assert(strstr(line, expected_line));
+			}
+			bb_ok = true;
+			continue;
+		}
+
+		req_matched = sscanf(dashes, "--- %d requests\n",
+				&requests);
+		if (req_matched == 1) {
+			igt_assert(strstr(ring_name, expected_ring_name));
+			igt_assert(requests == 1);
+
+			igt_assert(getline(&line, &line_size, file) > 0);
+			items = sscanf(line, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
+					&seqno, &jiffies, &tail);
+			igt_assert(items == 3);
+			req_ok = true;
+			continue;
+		}
+
+		ringbuf_matched = sscanf(dashes, "--- ringbuffer = 0x%08x\n",
+				&gtt_offset);
+		if (ringbuf_matched == 1) {
+			if (!strstr(ring_name, expected_ring_name))
+				continue;
+			igt_assert(req_ok);
+
+			for (i = 0; i < tail / 4; i++) {
+				igt_assert(getline(&line, &line_size, file) > 0);
+				items = sscanf(line, "%08x :  %08x\n",
+						&offset, &command);
+				igt_assert(items == 2);
+				if ((command & 0x1F800000) == MI_BATCH_BUFFER_START) {
+					igt_assert(getline(&line, &line_size, file) > 0);
+					items = sscanf(line, "%08x :  %08x\n",
+							&offset, &expected_addr);
+					igt_assert(items == 2);
+					i++;
+				}
+			}
+			igt_assert(expected_addr == expected_offset);
+			ringbuf_ok = true;
+			continue;
+		}
+
+		if (bb_ok && req_ok && ringbuf_ok)
+			break;
+	}
+	igt_assert(bb_ok && req_ok && ringbuf_ok);
+
+	free(line);
+	free(ring_name);
+	close(debug_fd);
+}
+
+static void test(int fd, uint32_t handle, unsigned ring_id, const char *ring_name)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec;
+	uint64_t presumed_offset;
+
+	gem_require_ring(fd, ring_id);
+
+	clear_error_state();
+
+	exec.handle = handle;
+	exec.relocation_count = 0;
+	exec.relocs_ptr = 0;
+	exec.alignment = 0;
+	exec.offset = 0;
+	exec.flags = 0;
+	exec.rsvd1 = 0;
+	exec.rsvd2 = 0;
+
+	execbuf.buffers_ptr = (uintptr_t)&exec;
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 16;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = ring_id;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, handle);
+
+	presumed_offset = exec.offset;
+
+	stop_rings();
+
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, handle);
+
+	igt_assert(rings_stopped() == false);
+	igt_assert(presumed_offset == exec.offset);
+
+	check_error_state(ring_name, exec.offset);
+}
+
+uint32_t handle;
+int fd;
+
+igt_main
+{
+	igt_fixture {
+		fd = drm_open_any();
+
+		handle = gem_create(fd, 4096);
+		gem_write(fd, handle, 0, batch, sizeof(batch));
+	}
+
+	igt_subtest("render")
+		test(fd, handle, I915_EXEC_RENDER, "render ring");
+
+	igt_subtest("bsd")
+		test(fd, handle, I915_EXEC_BSD, "bsd ring");
+
+	igt_subtest("blt")
+		test(fd, handle, I915_EXEC_BLT, "blitter ring");
+
+	igt_subtest("vebox")
+		test(fd, handle, I915_EXEC_VEBOX, "video enhancement ring");
+
+	igt_fixture {
+		gem_close(fd, handle);
+
+		close(fd);
+	}
+}
-- 
1.9.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-11 16:48 ` [PATCH v2] " oscar.mateo
@ 2014-04-11 17:12   ` Chris Wilson
  2014-04-14 10:01     ` Mateo Lozano, Oscar
  2014-04-15 14:49   ` Mika Kuoppala
  2014-05-09 11:54   ` [PATCH] " oscar.mateo
  2 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2014-04-11 17:12 UTC (permalink / raw)
  To: oscar.mateo; +Cc: intel-gfx

On Fri, Apr 11, 2014 at 05:48:12PM +0100, oscar.mateo@intel.com wrote:
> +static void check_error_state(const char *expected_ring_name,
> +				uint64_t expected_offset)
> +{
> +	FILE *file;
> +	int debug_fd;
> +	char *line = NULL;
> +	size_t line_size = 0;
> +	char *dashes = NULL;
> +	char *ring_name = NULL;
> +	int bb_matched = 0;
> +	uint32_t gtt_offset;
> +	char expected_line[32];
> +	int req_matched = 0;
> +	int requests;
> +	uint32_t seqno, tail;
> +	long jiffies;
> +	int ringbuf_matched = 0;
> +	unsigned int offset, command, expected_addr = 0;
> +	int i, items;
> +	bool bb_ok = false, req_ok = false, ringbuf_ok = false;

Most of these are only of use in local scope.

> +	debug_fd = igt_debugfs_open("i915_error_state", O_RDONLY);
> +	igt_assert(debug_fd >= 0);
> +	file = fdopen(debug_fd, "r");
> +
> +	while (getline(&line, &line_size, file) > 0) {
> +		dashes = strstr(line, "---");
> +		if (!dashes)
> +			continue;
> +
> +		ring_name = realloc(ring_name, dashes - line);
> +		strncpy(ring_name, line, dashes - line);
> +		ring_name[dashes - line - 1] = '\0';
> +
> +		bb_matched = sscanf(dashes, "--- gtt_offset = 0x%08x\n",
> +				&gtt_offset);
> +		if (bb_matched == 1) {
> +			igt_assert(strstr(ring_name, expected_ring_name));
> +			igt_assert(gtt_offset == expected_offset);
> +
> +			for (i = 0; i < sizeof(batch) / 4; i++) {
> +				igt_assert(getline(&line, &line_size, file) > 0);
> +				snprintf(expected_line, sizeof(expected_line), "%08x :  %08x",
> +						4*i, batch[i]);
> +				igt_assert(strstr(line, expected_line));
> +			}
> +			bb_ok = true;
> +			continue;
> +		}
> +
> +		req_matched = sscanf(dashes, "--- %d requests\n",
> +				&requests);
> +		if (req_matched == 1) {
> +			igt_assert(strstr(ring_name, expected_ring_name));
> +			igt_assert(requests == 1);

Bad assumption. You could still have the request from
gem_quiescent_gpu() which may not have been retired before the error
triggers.

> +
> +			igt_assert(getline(&line, &line_size, file) > 0);
> +			items = sscanf(line, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
> +					&seqno, &jiffies, &tail);
> +			igt_assert(items == 3);

Bad. I may change the format. s/may/will/

> +			req_ok = true;
> +			continue;
> +		}
> +
> +		ringbuf_matched = sscanf(dashes, "--- ringbuffer = 0x%08x\n",
> +				&gtt_offset);
> +		if (ringbuf_matched == 1) {
> +			if (!strstr(ring_name, expected_ring_name))
> +				continue;
> +			igt_assert(req_ok);
> +
> +			for (i = 0; i < tail / 4; i++) {
> +				igt_assert(getline(&line, &line_size, file) > 0);
> +				items = sscanf(line, "%08x :  %08x\n",
> +						&offset, &command);
> +				igt_assert(items == 2);
> +				if ((command & 0x1F800000) == MI_BATCH_BUFFER_START) {
> +					igt_assert(getline(&line, &line_size, file) > 0);
> +					items = sscanf(line, "%08x :  %08x\n",
> +							&offset, &expected_addr);
> +					igt_assert(items == 2);
> +					i++;
> +				}
> +			}
> +			igt_assert(expected_addr == expected_offset);

Bad. Some gen encode flags into the BB start address.

> +			ringbuf_ok = true;
> +			continue;
> +		}
> +
> +		if (bb_ok && req_ok && ringbuf_ok)
> +			break;
> +	}
> +	igt_assert(bb_ok && req_ok && ringbuf_ok);
> +
> +	free(line);
> +	free(ring_name);
> +	close(debug_fd);
> +}

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-11 17:12   ` Chris Wilson
@ 2014-04-14 10:01     ` Mateo Lozano, Oscar
  2014-04-14 10:14       ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Mateo Lozano, Oscar @ 2014-04-14 10:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

> > +	FILE *file;
> > +	int debug_fd;
> > +	char *line = NULL;
> > +	size_t line_size = 0;
> > +	char *dashes = NULL;
> > +	char *ring_name = NULL;
> > +	int bb_matched = 0;
> > +	uint32_t gtt_offset;
> > +	char expected_line[32];
> > +	int req_matched = 0;
> > +	int requests;
> > +	uint32_t seqno, tail;
> > +	long jiffies;
> > +	int ringbuf_matched = 0;
> > +	unsigned int offset, command, expected_addr = 0;
> > +	int i, items;
> > +	bool bb_ok = false, req_ok = false, ringbuf_ok = false;
> 
> Most of these are only of use in local scope.

ACK, will change.

> > +	debug_fd = igt_debugfs_open("i915_error_state", O_RDONLY);
> > +	igt_assert(debug_fd >= 0);
> > +	file = fdopen(debug_fd, "r");
> > +
> > +	while (getline(&line, &line_size, file) > 0) {
> > +		dashes = strstr(line, "---");
> > +		if (!dashes)
> > +			continue;
> > +
> > +		ring_name = realloc(ring_name, dashes - line);
> > +		strncpy(ring_name, line, dashes - line);
> > +		ring_name[dashes - line - 1] = '\0';
> > +
> > +		bb_matched = sscanf(dashes, "--- gtt_offset = 0x%08x\n",
> > +				&gtt_offset);
> > +		if (bb_matched == 1) {
> > +			igt_assert(strstr(ring_name, expected_ring_name));
> > +			igt_assert(gtt_offset == expected_offset);
> > +
> > +			for (i = 0; i < sizeof(batch) / 4; i++) {
> > +				igt_assert(getline(&line, &line_size, file) > 0);
> > +				snprintf(expected_line, sizeof(expected_line),
> "%08x :  %08x",
> > +						4*i, batch[i]);
> > +				igt_assert(strstr(line, expected_line));
> > +			}
> > +			bb_ok = true;
> > +			continue;
> > +		}
> > +
> > +		req_matched = sscanf(dashes, "--- %d requests\n",
> > +				&requests);
> > +		if (req_matched == 1) {
> > +			igt_assert(strstr(ring_name, expected_ring_name));
> > +			igt_assert(requests == 1);
> 
> Bad assumption. You could still have the request from
> gem_quiescent_gpu() which may not have been retired before the error
> triggers.

Hmmmm... but I make a first valid gem_execbuf()+gem_sync() before the one that actually hangs. Shouldn´t that make sure that all previous requests have been retired?

> > +
> > +			igt_assert(getline(&line, &line_size, file) > 0);
> > +			items = sscanf(line, "  seqno 0x%08x, emitted %ld, tail
> 0x%08x\n",
> > +					&seqno, &jiffies, &tail);
> > +			igt_assert(items == 3);
> 
> Bad. I may change the format. s/may/will/

I still need to make sure I got a valid tail, so I need to know the format and fail if don´t understand it. Or do you want me to use a different tail, maybe the ringbuffer one?

> > +			req_ok = true;
> > +			continue;
> > +		}
> > +
> > +		ringbuf_matched = sscanf(dashes, "--- ringbuffer = 0x%08x\n",
> > +				&gtt_offset);
> > +		if (ringbuf_matched == 1) {
> > +			if (!strstr(ring_name, expected_ring_name))
> > +				continue;
> > +			igt_assert(req_ok);
> > +
> > +			for (i = 0; i < tail / 4; i++) {
> > +				igt_assert(getline(&line, &line_size, file) > 0);
> > +				items = sscanf(line, "%08x :  %08x\n",
> > +						&offset, &command);
> > +				igt_assert(items == 2);
> > +				if ((command & 0x1F800000) ==
> MI_BATCH_BUFFER_START) {
> > +					igt_assert(getline(&line, &line_size,
> file) > 0);
> > +					items = sscanf(line, "%08x :  %08x\n",
> > +							&offset,
> &expected_addr);
> > +					igt_assert(items == 2);
> > +					i++;
> > +				}
> > +			}
> > +			igt_assert(expected_addr == expected_offset);
> 
> Bad. Some gen encode flags into the BB start address.

ACK, will change.

> > +			ringbuf_ok = true;
> > +			continue;
> > +		}
> > +
> > +		if (bb_ok && req_ok && ringbuf_ok)
> > +			break;
> > +	}
> > +	igt_assert(bb_ok && req_ok && ringbuf_ok);
> > +
> > +	free(line);
> > +	free(ring_name);
> > +	close(debug_fd);
> > +}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-14 10:01     ` Mateo Lozano, Oscar
@ 2014-04-14 10:14       ` Chris Wilson
  2014-04-14 10:23         ` Mateo Lozano, Oscar
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2014-04-14 10:14 UTC (permalink / raw)
  To: Mateo Lozano, Oscar; +Cc: intel-gfx

On Mon, Apr 14, 2014 at 10:01:12AM +0000, Mateo Lozano, Oscar wrote:
> > > +				&requests);
> > > +		if (req_matched == 1) {
> > > +			igt_assert(strstr(ring_name, expected_ring_name));
> > > +			igt_assert(requests == 1);
> > 
> > Bad assumption. You could still have the request from
> > gem_quiescent_gpu() which may not have been retired before the error
> > triggers.
> 
> Hmmmm... but I make a first valid gem_execbuf()+gem_sync() before the one that actually hangs. Shouldn´t that make sure that all previous requests have been retired?

No, I would not make that assumption about kernel behaviour. The only
thing that it will guarantee is that the last request with that handle
is retired. (In other words, forthcoming bug fixes already break this
assumption.)
 
> > > +
> > > +			igt_assert(getline(&line, &line_size, file) > 0);
> > > +			items = sscanf(line, "  seqno 0x%08x, emitted %ld, tail
> > 0x%08x\n",
> > > +					&seqno, &jiffies, &tail);
> > > +			igt_assert(items == 3);
> > 
> > Bad. I may change the format. s/may/will/
> 
> I still need to make sure I got a valid tail, so I need to know the format and fail if don´t understand it. Or do you want me to use a different tail, maybe the ringbuffer one?

I didn't spot where you used tail. Care to elaborate? I may be able to
suggest an alternative, or we may either code this more defensively or
make the kernel emission easier to use and hopefully more informative
for debugging.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-14 10:14       ` Chris Wilson
@ 2014-04-14 10:23         ` Mateo Lozano, Oscar
  2014-04-14 10:30           ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Mateo Lozano, Oscar @ 2014-04-14 10:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

> > > > +				&requests);
> > > > +		if (req_matched == 1) {
> > > > +			igt_assert(strstr(ring_name, expected_ring_name));
> > > > +			igt_assert(requests == 1);
> > >
> > > Bad assumption. You could still have the request from
> > > gem_quiescent_gpu() which may not have been retired before the error
> > > triggers.
> >
> > Hmmmm... but I make a first valid gem_execbuf()+gem_sync() before the
> one that actually hangs. Shouldn´t that make sure that all previous requests
> have been retired?
> 
> No, I would not make that assumption about kernel behaviour. The only thing
> that it will guarantee is that the last request with that handle is retired. (In
> other words, forthcoming bug fixes already break this
> assumption.)

Ok, then I´ll check all the reported requests (can I at least assume that the one I am interested in will be the last one? request_list seems to be FIFO...).
 
> > > > +
> > > > +			igt_assert(getline(&line, &line_size, file) > 0);
> > > > +			items = sscanf(line, "  seqno 0x%08x, emitted %ld, tail
> > > 0x%08x\n",
> > > > +					&seqno, &jiffies, &tail);
> > > > +			igt_assert(items == 3);
> > >
> > > Bad. I may change the format. s/may/will/
> >
> > I still need to make sure I got a valid tail, so I need to know the format and
> fail if don´t understand it. Or do you want me to use a different tail, maybe the
> ringbuffer one?
> 
> I didn't spot where you used tail. Care to elaborate? I may be able to suggest an
> alternative, or we may either code this more defensively or make the kernel
> emission easier to use and hopefully more informative for debugging.

I´m using the request tail to traverse the ring buffer, make sure there is a MI_BB_START and then our object´s address shortly before that tail. I could use ring->tail, or I could skip this check altogether (after all, I´m already checking that the GPU was executing a Batch Buffer, and that the BB had my magic number on it).

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-14 10:23         ` Mateo Lozano, Oscar
@ 2014-04-14 10:30           ` Chris Wilson
  2014-04-14 13:03             ` Mateo Lozano, Oscar
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2014-04-14 10:30 UTC (permalink / raw)
  To: Mateo Lozano, Oscar; +Cc: intel-gfx

On Mon, Apr 14, 2014 at 10:23:20AM +0000, Mateo Lozano, Oscar wrote:
> > > > > +				&requests);
> > > > > +		if (req_matched == 1) {
> > > > > +			igt_assert(strstr(ring_name, expected_ring_name));
> > > > > +			igt_assert(requests == 1);
> > > >
> > > > Bad assumption. You could still have the request from
> > > > gem_quiescent_gpu() which may not have been retired before the error
> > > > triggers.
> > >
> > > Hmmmm... but I make a first valid gem_execbuf()+gem_sync() before the
> > one that actually hangs. Shouldn´t that make sure that all previous requests
> > have been retired?
> > 
> > No, I would not make that assumption about kernel behaviour. The only thing
> > that it will guarantee is that the last request with that handle is retired. (In
> > other words, forthcoming bug fixes already break this
> > assumption.)
> 
> Ok, then I´ll check all the reported requests (can I at least assume that the one I am interested in will be the last one? request_list seems to be FIFO...).

In general, even that may be dangerous. (But less so if can be sure that
is no batchbuffer injection from the testsuite, and that there are no
other clients, or that the kernel doesn't start having to do its own
w/a.)

> > > > > +
> > > > > +			igt_assert(getline(&line, &line_size, file) > 0);
> > > > > +			items = sscanf(line, "  seqno 0x%08x, emitted %ld, tail
> > > > 0x%08x\n",
> > > > > +					&seqno, &jiffies, &tail);
> > > > > +			igt_assert(items == 3);
> > > >
> > > > Bad. I may change the format. s/may/will/
> > >
> > > I still need to make sure I got a valid tail, so I need to know the format and
> > fail if don´t understand it. Or do you want me to use a different tail, maybe the
> > ringbuffer one?
> > 
> > I didn't spot where you used tail. Care to elaborate? I may be able to suggest an
> > alternative, or we may either code this more defensively or make the kernel
> > emission easier to use and hopefully more informative for debugging.
> 
> I´m using the request tail to traverse the ring buffer, make sure there is a MI_BB_START and then our object´s address shortly before that tail. I could use ring->tail, or I could skip this check altogether (after all, I´m already checking that the GPU was executing a Batch Buffer, and that the BB had my magic number on it).

I would add a little more smarts to both the kernel and error-decode.
In the kernel, we can print the guilty request, which you can then use
to confirm that it is yours. That seems to me to be a stronger
validation of gem_error_capture, and a useful bit of information from
hangstats that we do not expose currently.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-14 10:30           ` Chris Wilson
@ 2014-04-14 13:03             ` Mateo Lozano, Oscar
  2014-04-15 19:38               ` Daniel Vetter
  0 siblings, 1 reply; 15+ messages in thread
From: Mateo Lozano, Oscar @ 2014-04-14 13:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

> I would add a little more smarts to both the kernel and error-decode.
> In the kernel, we can print the guilty request, which you can then use to
> confirm that it is yours. That seems to me to be a stronger validation of
> gem_error_capture, and a useful bit of information from hangstats that we do
> not expose currently.

That sounds good. I have to add a number of other things to i915_gpu_error as part of the Execlists code, so I´ll add a "--- guilty request" as well and resubmit this test together with the series.

Thanks,
Oscar

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-11 16:48 ` [PATCH v2] " oscar.mateo
  2014-04-11 17:12   ` Chris Wilson
@ 2014-04-15 14:49   ` Mika Kuoppala
  2014-05-09 12:04     ` Mateo Lozano, Oscar
  2014-05-09 11:54   ` [PATCH] " oscar.mateo
  2 siblings, 1 reply; 15+ messages in thread
From: Mika Kuoppala @ 2014-04-15 14:49 UTC (permalink / raw)
  To: oscar.mateo, intel-gfx


Hi,

oscar.mateo@intel.com writes:

> From: Oscar Mateo <oscar.mateo@intel.com>
>
> Guarantees that error capture works at a very basic level.
>
> v2: Also check that the ring object contains a reloc with MI_BB_START
> for the presumed batch object's address.
>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> ---
>  tests/.gitignore          |   1 +
>  tests/Makefile.sources    |   1 +
>  tests/gem_error_capture.c | 269 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 271 insertions(+)
>  create mode 100644 tests/gem_error_capture.c
>
> diff --git a/tests/.gitignore b/tests/.gitignore
> index 146bab0..945574c 100644
> --- a/tests/.gitignore
> +++ b/tests/.gitignore
> @@ -27,6 +27,7 @@ gem_ctx_create
>  gem_ctx_exec
>  gem_double_irq_loop
>  gem_dummy_reloc_loop
> +gem_error_capture
>  gem_evict_alignment
>  gem_evict_everything
>  gem_exec_bad_domains
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index bf02a48..612beb6 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -24,6 +24,7 @@ TESTS_progs_M = \
>  	gem_ctx_bad_exec \
>  	gem_ctx_exec \
>  	gem_dummy_reloc_loop \
> +	gem_error_capture \
>  	gem_evict_alignment \
>  	gem_evict_everything \
>  	gem_exec_bad_domains \
> diff --git a/tests/gem_error_capture.c b/tests/gem_error_capture.c
> new file mode 100644
> index 0000000..88845c9
> --- /dev/null
> +++ b/tests/gem_error_capture.c
> @@ -0,0 +1,269 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Oscar Mateo <oscar.mateo@intel.com>
> + *
> + */
> +
> +/*
> + * Testcase: Check whether basic error state capture/dump mechanism is correctly
> + * working for all rings.
> + */
> +
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include "drm.h"
> +#include "ioctl_wrappers.h"
> +#include "drmtest.h"
> +#include "intel_io.h"
> +#include "igt_debugfs.h"
> +
> +#define MAGIC_NUMBER 0x10001
> +uint32_t batch[4] = {MI_NOOP, MI_BATCH_BUFFER_END, MAGIC_NUMBER, MAGIC_NUMBER};
> +
> +static void stop_rings(void)
> +{
> +	int fd;
> +	static const char buf[] = "0xf";
> +
> +	fd = igt_debugfs_open("i915_ring_stop", O_WRONLY);
> +	igt_assert(fd >= 0);
> +
> +	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
> +
> +	close(fd);
> +}
> +
> +static bool rings_stopped(void)
> +{
> +	int fd;
> +	static char buf[128];
> +	unsigned long long val;
> +
> +	fd = igt_debugfs_open("i915_ring_stop", O_RDONLY);
> +	igt_assert(fd >= 0);
> +
> +	igt_assert(read(fd, buf, sizeof(buf)) > 0);
> +	close(fd);
> +
> +	sscanf(buf, "%llx", &val);
> +
> +	return (bool)val;
> +}

Please use igt_set_stop_rings() and igt_get_stop_rings().

Also consider stopping only the ring you are testing for.

> +static void clear_error_state(void)
> +{
> +	int fd;
> +	static const char buf[] = "";
> +
> +	fd = igt_debugfs_open("i915_error_state", O_WRONLY);
> +	igt_assert(fd >= 0);
> +
> +	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
> +	close(fd);
> +}
> +
> +static void check_error_state(const char *expected_ring_name,
> +				uint64_t expected_offset)
> +{
> +	FILE *file;
> +	int debug_fd;
> +	char *line = NULL;
> +	size_t line_size = 0;
> +	char *dashes = NULL;
> +	char *ring_name = NULL;
> +	int bb_matched = 0;
> +	uint32_t gtt_offset;
> +	char expected_line[32];
> +	int req_matched = 0;
> +	int requests;
> +	uint32_t seqno, tail;
> +	long jiffies;
> +	int ringbuf_matched = 0;
> +	unsigned int offset, command, expected_addr = 0;
> +	int i, items;
> +	bool bb_ok = false, req_ok = false, ringbuf_ok = false;
> +
> +	debug_fd = igt_debugfs_open("i915_error_state", O_RDONLY);
> +	igt_assert(debug_fd >= 0);
> +	file = fdopen(debug_fd, "r");
> +
> +	while (getline(&line, &line_size, file) > 0) {
> +		dashes = strstr(line, "---");
> +		if (!dashes)
> +			continue;
> +
> +		ring_name = realloc(ring_name, dashes - line);
> +		strncpy(ring_name, line, dashes - line);
> +		ring_name[dashes - line - 1] = '\0';
> +
> +		bb_matched = sscanf(dashes, "--- gtt_offset = 0x%08x\n",
> +				&gtt_offset);
> +		if (bb_matched == 1) {
> +			igt_assert(strstr(ring_name, expected_ring_name));
> +			igt_assert(gtt_offset == expected_offset);
> +
> +			for (i = 0; i < sizeof(batch) / 4; i++) {
> +				igt_assert(getline(&line, &line_size, file) > 0);
> +				snprintf(expected_line, sizeof(expected_line), "%08x :  %08x",
> +						4*i, batch[i]);
> +				igt_assert(strstr(line, expected_line));
> +			}
> +			bb_ok = true;
> +			continue;
> +		}
> +
> +		req_matched = sscanf(dashes, "--- %d requests\n",
> +				&requests);
> +		if (req_matched == 1) {
> +			igt_assert(strstr(ring_name, expected_ring_name));
> +			igt_assert(requests == 1);
> +
> +			igt_assert(getline(&line, &line_size, file) > 0);
> +			items = sscanf(line, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
> +					&seqno, &jiffies, &tail);
> +			igt_assert(items == 3);
> +			req_ok = true;
> +			continue;
> +		}
> +
> +		ringbuf_matched = sscanf(dashes, "--- ringbuffer = 0x%08x\n",
> +				&gtt_offset);
> +		if (ringbuf_matched == 1) {
> +			if (!strstr(ring_name, expected_ring_name))
> +				continue;
> +			igt_assert(req_ok);
> +
> +			for (i = 0; i < tail / 4; i++) {
> +				igt_assert(getline(&line, &line_size, file) > 0);
> +				items = sscanf(line, "%08x :  %08x\n",
> +						&offset, &command);
> +				igt_assert(items == 2);
> +				if ((command & 0x1F800000) == MI_BATCH_BUFFER_START) {
> +					igt_assert(getline(&line, &line_size, file) > 0);
> +					items = sscanf(line, "%08x :  %08x\n",
> +							&offset, &expected_addr);
> +					igt_assert(items == 2);

Should check for MAGIC_NUMBER here?

-Mika

> +					i++;
> +				}
> +			}
> +			igt_assert(expected_addr == expected_offset);
> +			ringbuf_ok = true;
> +			continue;
> +		}
> +
> +		if (bb_ok && req_ok && ringbuf_ok)
> +			break;
> +	}
> +	igt_assert(bb_ok && req_ok && ringbuf_ok);
> +
> +	free(line);
> +	free(ring_name);
> +	close(debug_fd);
> +}
> +
> +static void test(int fd, uint32_t handle, unsigned ring_id, const char *ring_name)
> +{
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	struct drm_i915_gem_exec_object2 exec;
> +	uint64_t presumed_offset;
> +
> +	gem_require_ring(fd, ring_id);
> +
> +	clear_error_state();
> +
> +	exec.handle = handle;
> +	exec.relocation_count = 0;
> +	exec.relocs_ptr = 0;
> +	exec.alignment = 0;
> +	exec.offset = 0;
> +	exec.flags = 0;
> +	exec.rsvd1 = 0;
> +	exec.rsvd2 = 0;
> +
> +	execbuf.buffers_ptr = (uintptr_t)&exec;
> +	execbuf.buffer_count = 1;
> +	execbuf.batch_start_offset = 0;
> +	execbuf.batch_len = 16;
> +	execbuf.cliprects_ptr = 0;
> +	execbuf.num_cliprects = 0;
> +	execbuf.DR1 = 0;
> +	execbuf.DR4 = 0;
> +	execbuf.flags = ring_id;
> +	i915_execbuffer2_set_context_id(execbuf, 0);
> +	execbuf.rsvd2 = 0;
> +
> +	gem_execbuf(fd, &execbuf);
> +	gem_sync(fd, handle);
> +
> +	presumed_offset = exec.offset;
> +
> +	stop_rings();
> +
> +	gem_execbuf(fd, &execbuf);
> +	gem_sync(fd, handle);
> +
> +	igt_assert(rings_stopped() == false);
> +	igt_assert(presumed_offset == exec.offset);
> +
> +	check_error_state(ring_name, exec.offset);
> +}
> +
> +uint32_t handle;
> +int fd;
> +
> +igt_main
> +{
> +	igt_fixture {
> +		fd = drm_open_any();
> +
> +		handle = gem_create(fd, 4096);
> +		gem_write(fd, handle, 0, batch, sizeof(batch));
> +	}
> +
> +	igt_subtest("render")
> +		test(fd, handle, I915_EXEC_RENDER, "render ring");
> +
> +	igt_subtest("bsd")
> +		test(fd, handle, I915_EXEC_BSD, "bsd ring");
> +
> +	igt_subtest("blt")
> +		test(fd, handle, I915_EXEC_BLT, "blitter ring");
> +
> +	igt_subtest("vebox")
> +		test(fd, handle, I915_EXEC_VEBOX, "video enhancement ring");
> +
> +	igt_fixture {
> +		gem_close(fd, handle);
> +
> +		close(fd);
> +	}
> +}
> -- 
> 1.9.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-14 13:03             ` Mateo Lozano, Oscar
@ 2014-04-15 19:38               ` Daniel Vetter
  2014-05-09 12:07                 ` Mateo Lozano, Oscar
  0 siblings, 1 reply; 15+ messages in thread
From: Daniel Vetter @ 2014-04-15 19:38 UTC (permalink / raw)
  To: Mateo Lozano, Oscar; +Cc: intel-gfx

On Mon, Apr 14, 2014 at 01:03:58PM +0000, Mateo Lozano, Oscar wrote:
> > I would add a little more smarts to both the kernel and error-decode.
> > In the kernel, we can print the guilty request, which you can then use to
> > confirm that it is yours. That seems to me to be a stronger validation of
> > gem_error_capture, and a useful bit of information from hangstats that we do
> > not expose currently.
> 
> That sounds good. I have to add a number of other things to
> i915_gpu_error as part of the Execlists code, so I´ll add a "--- guilty
> request" as well and resubmit this test together with the series.

If we want this much smarts then we need a properly hanging batch, e.g.
like the looping batch used in gem_reset_stats.

The problem with that is that this will kill the gpu if reset doesn't work
(i.e. gen2/3) so we need to skip this test there. Or maybe split things
into 2 subtests and use the properly hanging batch only when we do the
extended guilty testing under discussion here.

But in any case just checking that the batch is somewhere in the ring
(properly masking of lower bits 0-11 ofc) and checking whether the batch
is correctl dumped (with the magic value) would catch a lot of the
past&present execbuf bugs - we've had issues with dumping fancy values of
0 a lot.

For the guilty stuff we have an extensive set of tests in gem_reset_stat
using the reset stat ioctl already. And for the occasional "the hang
detection logic is busted bug" I think nothing short of a human brain
locking at the batch really helps. At least if we want to be somewhat
platform agnostic ...

So imo the current level of checking loosk Good Enough. But I'm certainly
not going to stop you ;-)

Cheers, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-11 16:48 ` [PATCH v2] " oscar.mateo
  2014-04-11 17:12   ` Chris Wilson
  2014-04-15 14:49   ` Mika Kuoppala
@ 2014-05-09 11:54   ` oscar.mateo
  2 siblings, 0 replies; 15+ messages in thread
From: oscar.mateo @ 2014-05-09 11:54 UTC (permalink / raw)
  To: intel-gfx

From: Oscar Mateo <oscar.mateo@intel.com>

Guarantees that error capture works at a very basic level.

v2: Also check that the ring object contains a reloc with MI_BB_START
for the presumed batch object's address.

v3: Chris review comments:
- Move variables to local scope.
- Do not assume there is only one request.
- Some gen encode flags into the BB start address.
Also, use igt_set/get_stop_rings as suggested by Mika Kuoppala.

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 tests/.gitignore          |   1 +
 tests/Makefile.sources    |   1 +
 tests/gem_error_capture.c | 254 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 256 insertions(+)
 create mode 100644 tests/gem_error_capture.c

diff --git a/tests/.gitignore b/tests/.gitignore
index d360b20..3b60c90 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -27,6 +27,7 @@ gem_ctx_create
 gem_ctx_exec
 gem_double_irq_loop
 gem_dummy_reloc_loop
+gem_error_capture
 gem_evict_alignment
 gem_evict_everything
 gem_exec_bad_domains
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index fae7f97..b63696f 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -24,6 +24,7 @@ TESTS_progs_M = \
 	gem_ctx_bad_exec \
 	gem_ctx_exec \
 	gem_dummy_reloc_loop \
+	gem_error_capture \
 	gem_evict_alignment \
 	gem_evict_everything \
 	gem_exec_bad_domains \
diff --git a/tests/gem_error_capture.c b/tests/gem_error_capture.c
new file mode 100644
index 0000000..d725d6c
--- /dev/null
+++ b/tests/gem_error_capture.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Oscar Mateo <oscar.mateo@intel.com>
+ *
+ */
+
+/*
+ * Testcase: Check whether basic error state capture/dump mechanism is correctly
+ * working for all rings.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include "drm.h"
+#include "ioctl_wrappers.h"
+#include "drmtest.h"
+#include "intel_io.h"
+#include "igt_debugfs.h"
+#include "intel_chipset.h"
+
+#define MAGIC_NUMBER 0x10001
+uint32_t batch[4] = {MI_NOOP, MI_BATCH_BUFFER_END, MAGIC_NUMBER, MAGIC_NUMBER};
+static uint32_t devid;
+
+static void clear_error_state(void)
+{
+	int fd;
+	static const char buf[] = "";
+
+	fd = igt_debugfs_open("i915_error_state", O_WRONLY);
+	igt_assert(fd >= 0);
+
+	igt_assert(write(fd, buf, sizeof(buf)) == sizeof(buf));
+	close(fd);
+}
+
+static void check_error_state(const char *expected_ring_name,
+				uint64_t expected_offset)
+{
+	FILE *file;
+	int debug_fd;
+	char *line = NULL;
+	size_t line_size = 0;
+	char *ring_name = NULL;
+	bool bb_ok = false, req_ok = false, ringbuf_ok = false;
+
+	debug_fd = igt_debugfs_open("i915_error_state", O_RDONLY);
+	igt_assert(debug_fd >= 0);
+	file = fdopen(debug_fd, "r");
+
+	while (getline(&line, &line_size, file) > 0) {
+		char *dashes = NULL;
+		int bb_matched = 0;
+		uint32_t gtt_offset;
+		int req_matched = 0;
+		int requests;
+		uint32_t tail;
+		int ringbuf_matched = 0;
+		int i, items;
+
+		dashes = strstr(line, "---");
+		if (!dashes)
+			continue;
+
+		ring_name = realloc(ring_name, dashes - line);
+		strncpy(ring_name, line, dashes - line);
+		ring_name[dashes - line - 1] = '\0';
+
+		bb_matched = sscanf(dashes, "--- gtt_offset = 0x%08x\n",
+				&gtt_offset);
+		if (bb_matched == 1) {
+			char expected_line[32];
+
+			igt_assert(strstr(ring_name, expected_ring_name));
+			igt_assert(gtt_offset == expected_offset);
+
+			for (i = 0; i < sizeof(batch) / 4; i++) {
+				igt_assert(getline(&line, &line_size, file) > 0);
+				snprintf(expected_line, sizeof(expected_line), "%08x :  %08x",
+						4*i, batch[i]);
+				igt_assert(strstr(line, expected_line));
+			}
+			bb_ok = true;
+			continue;
+		}
+
+		req_matched = sscanf(dashes, "--- %d requests\n", &requests);
+		if (req_matched == 1) {
+			igt_assert(strstr(ring_name, expected_ring_name));
+			igt_assert(requests > 0);
+
+			for (i = 0; i < requests; i++) {
+				uint32_t seqno;
+				long jiffies;
+
+				igt_assert(getline(&line, &line_size, file) > 0);
+				items = sscanf(line, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
+						&seqno, &jiffies, &tail);
+				igt_assert(items == 3);
+			}
+			req_ok = true;
+			continue;
+		}
+
+		ringbuf_matched = sscanf(dashes, "--- ringbuffer = 0x%08x\n",
+				&gtt_offset);
+		if (ringbuf_matched == 1) {
+			unsigned int offset, command, expected_addr = 0;
+
+			if (!strstr(ring_name, expected_ring_name))
+				continue;
+			igt_assert(req_ok);
+
+			for (i = 0; i < tail / 4; i++) {
+				igt_assert(getline(&line, &line_size, file) > 0);
+				items = sscanf(line, "%08x :  %08x\n",
+						&offset, &command);
+				igt_assert(items == 2);
+				if ((command & 0x1F800000) == MI_BATCH_BUFFER_START) {
+					igt_assert(getline(&line, &line_size, file) > 0);
+					items = sscanf(line, "%08x :  %08x\n",
+							&offset, &expected_addr);
+					igt_assert(items == 2);
+					i++;
+				}
+			}
+			if (intel_gen(devid) >= 4)
+				igt_assert(expected_addr == expected_offset);
+			else
+				igt_assert((expected_addr & ~0x1) == expected_offset);
+			ringbuf_ok = true;
+			continue;
+		}
+
+		if (bb_ok && req_ok && ringbuf_ok)
+			break;
+	}
+	igt_assert(bb_ok && req_ok && ringbuf_ok);
+
+	free(line);
+	free(ring_name);
+	close(debug_fd);
+}
+
+static void test(int fd, uint32_t handle, unsigned ring_id, const char *ring_name)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec;
+	uint64_t presumed_offset;
+	enum stop_ring_flags flags;
+
+	gem_require_ring(fd, ring_id);
+
+	clear_error_state();
+
+	exec.handle = handle;
+	exec.relocation_count = 0;
+	exec.relocs_ptr = 0;
+	exec.alignment = 0;
+	exec.offset = 0;
+	exec.flags = 0;
+	exec.rsvd1 = 0;
+	exec.rsvd2 = 0;
+
+	execbuf.buffers_ptr = (uintptr_t)&exec;
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 16;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = ring_id;
+	i915_execbuffer2_set_context_id(execbuf, 0);
+	execbuf.rsvd2 = 0;
+
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, handle);
+
+	presumed_offset = exec.offset;
+
+	flags = igt_to_stop_ring_flag(ring_id);
+	igt_set_stop_rings(flags);
+
+	gem_execbuf(fd, &execbuf);
+	gem_sync(fd, handle);
+
+	igt_assert(igt_get_stop_rings() == STOP_RING_NONE);
+	igt_assert(presumed_offset == exec.offset);
+
+	check_error_state(ring_name, exec.offset);
+}
+
+uint32_t handle;
+int fd;
+
+igt_main
+{
+	igt_fixture {
+		fd = drm_open_any();
+
+		handle = gem_create(fd, 4096);
+		gem_write(fd, handle, 0, batch, sizeof(batch));
+
+		devid = intel_get_drm_devid(fd);
+	}
+
+	igt_subtest("render")
+		test(fd, handle, I915_EXEC_RENDER, "render ring");
+
+	igt_subtest("bsd")
+		test(fd, handle, I915_EXEC_BSD, "bsd ring");
+
+	igt_subtest("blt")
+		test(fd, handle, I915_EXEC_BLT, "blitter ring");
+
+	igt_subtest("vebox")
+		test(fd, handle, I915_EXEC_VEBOX, "video enhancement ring");
+
+	igt_fixture {
+		gem_close(fd, handle);
+
+		close(fd);
+	}
+}
-- 
1.9.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-15 14:49   ` Mika Kuoppala
@ 2014-05-09 12:04     ` Mateo Lozano, Oscar
  0 siblings, 0 replies; 15+ messages in thread
From: Mateo Lozano, Oscar @ 2014-05-09 12:04 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

Hi Mika,

> > +static bool rings_stopped(void)
> > +{
> > +	int fd;
> > +	static char buf[128];
> > +	unsigned long long val;
> > +
> > +	fd = igt_debugfs_open("i915_ring_stop", O_RDONLY);
> > +	igt_assert(fd >= 0);
> > +
> > +	igt_assert(read(fd, buf, sizeof(buf)) > 0);
> > +	close(fd);
> > +
> > +	sscanf(buf, "%llx", &val);
> > +
> > +	return (bool)val;
> > +}
> 
> Please use igt_set_stop_rings() and igt_get_stop_rings().
> 
> Also consider stopping only the ring you are testing for.

Oops, I didn see these before. Ok, done in the new version.

> > +			for (i = 0; i < tail / 4; i++) {
> > +				igt_assert(getline(&line, &line_size, file) > 0);
> > +				items = sscanf(line, "%08x :  %08x\n",
> > +						&offset, &command);
> > +				igt_assert(items == 2);
> > +				if ((command & 0x1F800000) == MI_BATCH_BUFFER_START) {
> > +					igt_assert(getline(&line, &line_size, file) > 0);
> > +					items = sscanf(line, "%08x :  %08x\n",
> > +							&offset, &expected_addr);
> > +					igt_assert(items == 2);
> 
> Should check for MAGIC_NUMBER here?

I'm doing it above already, inside bb_matched:

			for (i = 0; i < sizeof(batch) / 4; i++) {
				igt_assert(getline(&line, &line_size, file) > 0);
				snprintf(expected_line, sizeof(expected_line), "%08x :  %08x",
						4*i, batch[i]);
				igt_assert(strstr(line, expected_line));
			}

-- Oscar

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] tests/gem_error_capture: Initial testcase for error state capture/dump
  2014-04-15 19:38               ` Daniel Vetter
@ 2014-05-09 12:07                 ` Mateo Lozano, Oscar
  0 siblings, 0 replies; 15+ messages in thread
From: Mateo Lozano, Oscar @ 2014-05-09 12:07 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Tue, 2014-04-15 at 21:38 +0200, Daniel Vetter wrote:
> On Mon, Apr 14, 2014 at 01:03:58PM +0000, Mateo Lozano, Oscar wrote:
> > > I would add a little more smarts to both the kernel and error-decode.
> > > In the kernel, we can print the guilty request, which you can then use to
> > > confirm that it is yours. That seems to me to be a stronger validation of
> > > gem_error_capture, and a useful bit of information from hangstats that we do
> > > not expose currently.
> > 
> > That sounds good. I have to add a number of other things to
> > i915_gpu_error as part of the Execlists code, so I´ll add a "--- guilty
> > request" as well and resubmit this test together with the series.
> 
> If we want this much smarts then we need a properly hanging batch, e.g.
> like the looping batch used in gem_reset_stats.
> 
> The problem with that is that this will kill the gpu if reset doesn't work
> (i.e. gen2/3) so we need to skip this test there. Or maybe split things
> into 2 subtests and use the properly hanging batch only when we do the
> extended guilty testing under discussion here.
> 
> But in any case just checking that the batch is somewhere in the ring
> (properly masking of lower bits 0-11 ofc) and checking whether the batch
> is correctl dumped (with the magic value) would catch a lot of the
> past&present execbuf bugs - we've had issues with dumping fancy values of
> 0 a lot.
> 
> For the guilty stuff we have an extensive set of tests in gem_reset_stat
> using the reset stat ioctl already. And for the occasional "the hang
> detection logic is busted bug" I think nothing short of a human brain
> locking at the batch really helps. At least if we want to be somewhat
> platform agnostic ...
> 
> So imo the current level of checking loosk Good Enough. But I'm certainly
> not going to stop you ;-)

Ok, for the moment I'm happy that I can unblock Execlists. I don't mind
looking at the "--- guilty requests" in the future, but first I need to
get Execlists out of the way...

Cheers,
Oscar
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2014-05-09 12:07 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-11 12:59 [PATCH] tests/gem_error_capture: Initial testcase for error state capture/dump oscar.mateo
2014-04-11 13:09 ` Mateo Lozano, Oscar
2014-04-11 14:27   ` Daniel Vetter
2014-04-11 16:48 ` [PATCH v2] " oscar.mateo
2014-04-11 17:12   ` Chris Wilson
2014-04-14 10:01     ` Mateo Lozano, Oscar
2014-04-14 10:14       ` Chris Wilson
2014-04-14 10:23         ` Mateo Lozano, Oscar
2014-04-14 10:30           ` Chris Wilson
2014-04-14 13:03             ` Mateo Lozano, Oscar
2014-04-15 19:38               ` Daniel Vetter
2014-05-09 12:07                 ` Mateo Lozano, Oscar
2014-04-15 14:49   ` Mika Kuoppala
2014-05-09 12:04     ` Mateo Lozano, Oscar
2014-05-09 11:54   ` [PATCH] " oscar.mateo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.