All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH igt] lib: Add a GPU error detector
@ 2016-03-22 11:48 Chris Wilson
  0 siblings, 0 replies; only message in thread
From: Chris Wilson @ 2016-03-22 11:48 UTC (permalink / raw)
  To: intel-gfx

If we listen to the uevents from the kernel, we can detect when the GPU
hangs. This requires us to fork a helper process to do so and send a
signal back to the parent.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 benchmarks/Makefile.am   |  2 +-
 debugger/Makefile.am     |  2 +-
 demos/Makefile.am        |  2 +-
 lib/Makefile.am          | 12 +++++--
 lib/igt_aux.c            | 82 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_aux.h            |  3 ++
 tests/Makefile.am        |  3 +-
 tests/gem_exec_whisper.c |  4 +++
 tools/Makefile.am        |  2 +-
 9 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
index c67f472..2c2d100 100644
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
@@ -3,7 +3,7 @@ include Makefile.sources
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
 AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS)
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
+LDADD = $(top_builddir)/lib/libintel_tools.la
 
 benchmarks_LTLIBRARIES = gem_exec_tracer.la
 gem_exec_tracer_la_LDFLAGS = -module -avoid-version -no-undefined
diff --git a/debugger/Makefile.am b/debugger/Makefile.am
index 5a523f5..9d231d3 100644
--- a/debugger/Makefile.am
+++ b/debugger/Makefile.am
@@ -15,4 +15,4 @@ AM_CFLAGS = 			\
 	$(LIBUNWIND_CFLAGS)	\
 	$(CWARNFLAGS)
 
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = $(top_builddir)/lib/libintel_tools.la
diff --git a/demos/Makefile.am b/demos/Makefile.am
index d18a705..e6fbb3b 100644
--- a/demos/Makefile.am
+++ b/demos/Makefile.am
@@ -4,4 +4,4 @@ bin_PROGRAMS = 				\
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
 AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS)
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = $(top_builddir)/lib/libintel_tools.la
diff --git a/lib/Makefile.am b/lib/Makefile.am
index a8a1eb6..d2f2e16 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -15,12 +15,20 @@ if HAVE_VC4
 endif
 
 AM_CPPFLAGS = -I$(top_srcdir)
-AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
+AM_CFLAGS = $(CWARNFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(LIBUNWIND_CFLAGS) $(DEBUG_CFLAGS) \
 	    -DIGT_SRCDIR=\""$(abs_top_srcdir)/tests"\" \
 	    -DIGT_DATADIR=\""$(pkgdatadir)"\" \
 	    -DIGT_LOG_DOMAIN=\""$(subst _,-,$*)"\" \
 	    -pthread
 
-LDADD = $(CAIRO_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
 AM_CFLAGS += $(CAIRO_CFLAGS)
 
+libintel_tools_la_LIBADD = \
+	$(DRM_LIBS) \
+	$(PCIACCESS_LIBS) \
+	$(CAIRO_LIBS) \
+	$(LIBUDEV_LIBS) \
+	$(LIBUNWIND_LIBS) \
+	$(TIMER_LIBS) \
+	-lm
+
diff --git a/lib/igt_aux.c b/lib/igt_aux.c
index 7deaf2f..d8f72fb 100644
--- a/lib/igt_aux.c
+++ b/lib/igt_aux.c
@@ -42,6 +42,7 @@
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
+#include <sys/poll.h>
 #include <sys/wait.h>
 #include <sys/time.h>
 #include <sys/types.h>
@@ -360,6 +361,87 @@ void igt_stop_signal_helper(void)
 	sig_stat = 0;
 }
 
+#if HAVE_UDEV
+#include <libudev.h>
+
+static struct igt_helper_process hang_detector;
+static void __attribute__((noreturn))
+hang_detector_process(pid_t pid, dev_t rdev)
+{
+	struct udev_monitor *mon =
+		udev_monitor_new_from_netlink(udev_new(), "kernel");
+	struct pollfd pfd;
+
+	udev_monitor_filter_add_match_subsystem_devtype(mon, "drm", NULL);
+	udev_monitor_enable_receiving(mon);
+
+	pfd.fd = udev_monitor_get_fd(mon);
+	pfd.events = POLLIN;
+
+	while (poll(&pfd, 1, -1) > 0) {
+		struct udev_device *dev = udev_monitor_receive_device(mon);
+		dev_t devnum;
+
+		if (dev == NULL)
+			break;
+
+		devnum = udev_device_get_devnum(dev);
+		if (memcmp(&rdev, &devnum, sizeof(dev_t)) == 0) {
+			const char *str;
+
+			str = udev_device_get_property_value(dev, "ERROR");
+			if (str && atoi(str) == 1)
+				kill(pid, SIGRTMAX);
+		}
+
+		udev_device_unref(dev);
+		if (kill(pid, 0)) /* Parent has died, so must we. */
+			break;
+	}
+
+	exit(0);
+}
+
+static void sig_abort(int sig)
+{
+	igt_assert(!"GPU hung");
+}
+
+void igt_fork_hang_detector(int fd)
+{
+	struct stat st;
+
+	if (igt_only_list_subtests())
+		return;
+
+	igt_assert(fstat(fd, &st) == 0);
+
+	signal(SIGRTMAX, sig_abort);
+	igt_fork_helper(&hang_detector)
+		hang_detector_process(getppid(), st.st_rdev);
+}
+
+void igt_stop_hang_detector(void)
+{
+	if (igt_only_list_subtests())
+		return;
+
+	igt_stop_helper(&hang_detector);
+}
+#else
+void igt_fork_hang_detector(int fd)
+{
+	if (igt_only_list_subtests())
+		return;
+
+	igt_skip();
+}
+
+void igt_stop_hang_detector(void)
+{
+}
+#endif
+
 /**
  * igt_check_boolean_env_var:
  * @env_var: environment variable name
diff --git a/lib/igt_aux.h b/lib/igt_aux.h
index 9fade67..eee80ca 100644
--- a/lib/igt_aux.h
+++ b/lib/igt_aux.h
@@ -40,6 +40,9 @@ extern int num_trash_bos;
 void igt_fork_signal_helper(void);
 void igt_stop_signal_helper(void);
 
+void igt_fork_hang_detector(int fd);
+void igt_stop_hang_detector(void);
+
 struct igt_sigiter {
 	unsigned pass;
 };
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 839b37d..24d374a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -56,9 +56,8 @@ AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(DEBUG_CFLAGS)\
 	$(LIBUNWIND_CFLAGS) \
 	$(NULL)
 
-LDADD = ../lib/libintel_tools.la $(PCIACCESS_LIBS) $(DRM_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS)
+LDADD = ../lib/libintel_tools.la $(GLIB_LIBS)
 
-LDADD += $(CAIRO_LIBS) $(LIBUDEV_LIBS) $(GLIB_LIBS) -lm
 AM_CFLAGS += $(CAIRO_CFLAGS) $(LIBUDEV_CFLAGS) $(GLIB_CFLAGS)
 AM_LDFLAGS = -Wl,--as-needed
 
diff --git a/tests/gem_exec_whisper.c b/tests/gem_exec_whisper.c
index b84f1a2..1991fed 100644
--- a/tests/gem_exec_whisper.c
+++ b/tests/gem_exec_whisper.c
@@ -368,6 +368,8 @@ igt_main
 	igt_fixture
 		fd = drm_open_driver_master(DRIVER_INTEL);
 
+	igt_fork_hang_detector(fd);
+
 	for (const struct mode *m = modes; m->name; m++)
 		igt_subtest_f("%s", *m->name ? m->name : "basic")
 			whisper(fd, -1, m->flags);
@@ -382,6 +384,8 @@ igt_main
 				whisper(fd, e->exec_id | e->flags, m->flags);
 	}
 
+	igt_stop_hang_detector();
+
 	igt_fixture
 		close(fd);
 }
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 74c5521..df48d94 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -4,7 +4,7 @@ SUBDIRS = null_state_gen registers
 
 AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
 AM_CFLAGS = $(DEBUG_CFLAGS) $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) $(LIBUNWIND_CFLAGS) -DPKGDATADIR=\"$(pkgdatadir)\"
-LDADD = $(top_builddir)/lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS) $(CAIRO_LIBS) $(LIBUDEV_LIBS) $(LIBUNWIND_LIBS) $(TIMER_LIBS) -lm
+LDADD = $(top_builddir)/lib/libintel_tools.la
 AM_LDFLAGS = -Wl,--as-needed
 
 
-- 
2.8.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2016-03-22 11:48 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-22 11:48 [PATCH igt] lib: Add a GPU error detector Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.