All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH i-g-t 2/3] i915: Exercise sysfs heartbeat controls
Date: Sat, 19 Oct 2019 11:04:38 +0100	[thread overview]
Message-ID: <20191019100439.24640-2-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20191019100439.24640-1-chris@chris-wilson.co.uk>

We [will] expose various per-engine scheduling controls. One of which,
'heartbeat_duration_ms', defines how often we send a heartbeat down the
engine to check upon the health of the engine. If a heartbeat does not
complete within the interval (or two), the engine is declared hung.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/Makefile.sources                |   1 +
 tests/i915/sysfs_heartbeat_interval.c | 434 ++++++++++++++++++++++++++
 tests/meson.build                     |   1 +
 3 files changed, 436 insertions(+)
 create mode 100644 tests/i915/sysfs_heartbeat_interval.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index dff7dac06..4494d2c05 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -98,6 +98,7 @@ TESTS_progs = \
 	tools_test \
 	vgem_basic \
 	vgem_slow \
+	i915/sysfs_heartbeat_interval \
 	i915/sysfs_preemption_timeout \
 	$(NULL)
 
diff --git a/tests/i915/sysfs_heartbeat_interval.c b/tests/i915/sysfs_heartbeat_interval.c
new file mode 100644
index 000000000..d0d525489
--- /dev/null
+++ b/tests/i915/sysfs_heartbeat_interval.c
@@ -0,0 +1,434 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "drmtest.h" /* gem_quiescent_gpu()! */
+#include "i915/gem_engine_topology.h"
+#include "igt_dummyload.h"
+#include "igt_sysfs.h"
+#include "ioctl_wrappers.h" /* igt_require_gem()! */
+#include "sw_sync.h"
+
+#include "igt_debugfs.h"
+
+static bool __enable_hangcheck(int dir, bool state)
+{
+	return igt_sysfs_set(dir, "enable_hangcheck", state ? "1" : "0");
+}
+
+static void enable_hangcheck(int i915, bool state)
+{
+	int dir;
+
+	dir = igt_sysfs_open_parameters(i915);
+	if (dir < 0) /* no parameters, must be default! */
+		return;
+
+	__enable_hangcheck(dir, state);
+	close(dir);
+}
+
+static void set_heartbeat(int engine, unsigned int value)
+{
+	unsigned int delay;
+
+	igt_sysfs_printf(engine, "heartbeat_interval_ms", "%u", value);
+	igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &delay);
+	igt_assert_eq(delay, value);
+}
+
+static void test_idempotent(int i915, int engine)
+{
+	unsigned int saved;
+
+	igt_assert(igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &saved) == 1);
+	igt_debug("Initial heartbeat_interval_ms:%u\n", saved);
+
+	set_heartbeat(engine, 1);
+	set_heartbeat(engine, saved);
+}
+
+static void test_invalid(int i915, int engine)
+{
+	unsigned int saved, delay;
+
+	igt_assert(igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &saved) == 1);
+	igt_debug("Initial heartbeat_interval_ms:%u\n", saved);
+
+	igt_sysfs_printf(engine, "heartbeat_interval_ms", PRIu64, -1);
+	igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &delay);
+	igt_assert_eq(delay, saved);
+
+	igt_sysfs_printf(engine, "heartbeat_interval_ms", PRIu64, 10ull << 32);
+	igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &delay);
+	igt_assert_eq(delay, saved);
+}
+
+static void set_unbannable(int i915, uint32_t ctx)
+{
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = ctx,
+		.param = I915_CONTEXT_PARAM_BANNABLE,
+	};
+
+	igt_assert_eq(__gem_context_set_param(i915, &p), 0);
+}
+
+static uint32_t create_context(int i915, unsigned int class, unsigned int inst, int prio)
+{
+	uint32_t ctx;
+
+	ctx = gem_context_create_for_engine(i915, class, inst);
+	set_unbannable(i915, ctx);
+	gem_context_set_priority(i915, ctx, prio);
+
+	return ctx;
+}
+
+static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
+{
+	unsigned int class, inst;
+	struct timespec ts = {};
+	igt_spin_t *spin[2];
+	uint64_t elapsed;
+	uint32_t ctx[2];
+
+	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
+	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
+
+	set_heartbeat(engine, timeout);
+
+	ctx[0] = create_context(i915, class, inst, 1023);
+	spin[0] = igt_spin_new(i915, ctx[0],
+			       .flags = (IGT_SPIN_NO_PREEMPTION |
+					 IGT_SPIN_POLL_RUN |
+					 IGT_SPIN_FENCE_OUT));
+	igt_spin_busywait_until_started(spin[0]);
+
+	ctx[1] = create_context(i915, class, inst, -1023);
+	igt_nsec_elapsed(&ts);
+	spin[1] = igt_spin_new(i915, ctx[1], .flags = IGT_SPIN_POLL_RUN);
+	igt_spin_busywait_until_started(spin[1]);
+	elapsed = igt_nsec_elapsed(&ts);
+
+	igt_spin_free(i915, spin[1]);
+
+	igt_assert_eq(sync_fence_wait(spin[0]->out_fence, 1), 0);
+	igt_assert_eq(sync_fence_status(spin[0]->out_fence), -EIO);
+
+	igt_spin_free(i915, spin[0]);
+
+	gem_context_destroy(i915, ctx[1]);
+	gem_context_destroy(i915, ctx[0]);
+	gem_quiescent_gpu(i915);
+
+	return elapsed;
+}
+
+static void test_precise(int i915, int engine)
+{
+	int delays[] = { 1, 50, 100, 500 };
+	unsigned int saved;
+
+	igt_require(igt_sysfs_printf(engine, "preempt_timeout_ms", "%u", 1) == 1);
+
+	igt_assert(igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &saved) == 1);
+	igt_debug("Initial heartbeat_interval_ms:%u\n", saved);
+	gem_quiescent_gpu(i915);
+
+	for (int i = 0; i < ARRAY_SIZE(delays); i++) {
+		uint64_t elapsed;
+
+		elapsed = __test_timeout(i915, engine, delays[i]);
+		igt_info("heartbeat_interval_ms:%d, elapsed=%.3fms[%d]\n",
+			 delays[i], elapsed * 1e-6,
+				(int)(elapsed / 1000 / 1000)
+			 );
+
+		/*
+		 * It takes a couple of missed heartbeats before we start
+		 * terminating hogs, and a little bit of jiffie slack for
+		 * scheduling at each step. 150ms should cover all of our
+		 * sins and be useful tolerance.
+		 */
+		igt_assert_f(elapsed / 1000 / 1000 < 3 * delays[i] + 150,
+			     "Heartbeat interval (and CPR) exceeded request!\n");
+	}
+
+	gem_quiescent_gpu(i915);
+	set_heartbeat(engine, saved);
+}
+
+static void test_nopreempt(int i915, int engine)
+{
+	int delays[] = { 1, 50, 100, 500 };
+	unsigned int saved;
+
+	/* Test heartbeats with forced preemption  disabled */
+	igt_sysfs_printf(engine, "preempt_timeout_ms", "%u", 0);
+
+	igt_assert(igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &saved) == 1);
+	igt_debug("Initial heartbeat_interval_ms:%u\n", saved);
+	gem_quiescent_gpu(i915);
+
+	for (int i = 0; i < ARRAY_SIZE(delays); i++) {
+		uint64_t elapsed;
+
+		elapsed = __test_timeout(i915, engine, delays[i]);
+		igt_info("heartbeat_interval_ms:%d, elapsed=%.3fms[%d]\n",
+			 delays[i], elapsed * 1e-6,
+				(int)(elapsed / 1000 / 1000)
+			 );
+
+		/*
+		 * It takes a several missed heartbeats before we start
+		 * terminating hogs, and a little bit of jiffie slack for
+		 * scheduling at each step. 250ms should cover all of our
+		 * sins and be useful tolerance.
+		 */
+		igt_assert_f(elapsed / 1000 / 1000 < 5 * delays[i] + 150,
+			     "Heartbeat interval (and CPR) exceeded request!\n");
+	}
+
+	gem_quiescent_gpu(i915);
+	set_heartbeat(engine, saved);
+}
+
+static void client(int i915, int engine, int *ctl, int duration, int expect)
+{
+	unsigned int class, inst;
+	unsigned long count = 0;
+	uint32_t ctx;
+
+	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
+	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
+
+	ctx = create_context(i915, class, inst, 0);
+
+	while (!READ_ONCE(*ctl)) {
+		igt_spin_t *spin;
+
+		spin = igt_spin_new(i915, ctx,
+				    .flags = (IGT_SPIN_NO_PREEMPTION |
+					      IGT_SPIN_POLL_RUN |
+					      IGT_SPIN_FENCE_OUT));
+		igt_spin_busywait_until_started(spin);
+
+		igt_spin_set_timeout(spin, (uint64_t)duration * 1000 * 1000);
+		sync_fence_wait(spin->out_fence, -1);
+
+		igt_assert_eq(sync_fence_status(spin->out_fence), expect);
+		count++;
+	}
+
+	gem_context_destroy(i915, ctx);
+	igt_info("%s client completed %lu spins\n",
+		 expect < 0 ? "Bad" : "Good", count);
+}
+
+static void sigign(int sig)
+{
+}
+
+static void wait_until(int duration)
+{
+	signal(SIGCHLD, sigign);
+	sleep(duration);
+	signal(SIGCHLD, SIG_IGN);
+}
+
+static void __test_mixed(int i915, int engine,
+			 int heartbeat,
+			 int good,
+			 int bad,
+			 int duration)
+{
+	unsigned int saved;
+	int *shared;
+
+	/*
+	 * Given two clients of which one is a hog, be sure we cleanly
+	 * terminate the hog leaving the good client to run.
+	 */
+
+	igt_assert(igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &saved) == 1);
+	igt_debug("Initial heartbeat_interval_ms:%u\n", saved);
+	gem_quiescent_gpu(i915);
+
+	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(shared != MAP_FAILED);
+
+	set_heartbeat(engine, heartbeat);
+
+	igt_fork(child, 1) /* good client */
+		client(i915, engine, shared, good, 1);
+	igt_fork(child, 1) /* bad client */
+		client(i915, engine, shared, bad, -EIO);
+
+	wait_until(duration);
+
+	*shared = true;
+	igt_waitchildren();
+	munmap(shared, 4096);
+
+	gem_quiescent_gpu(i915);
+	set_heartbeat(engine, saved);
+}
+
+static void test_mixed(int i915, int engine)
+{
+	igt_sysfs_printf(engine, "preempt_timeout_ms", "%u", 1);
+	__test_mixed(i915, engine, 10, 10, 100, 5);
+}
+
+static void test_long(int i915, int engine)
+{
+	igt_sysfs_printf(engine, "preempt_timeout_ms", "%u", 0);
+	__test_mixed(i915, engine,
+		     60 * 1000, /* 60s */
+		     60 * 1000, /* 60s */
+		     300 * 1000, /* 5min */
+		     24 * 3600 /* 24hours */);
+}
+
+static void test_off(int i915, int engine)
+{
+	unsigned int class, inst;
+	unsigned int saved, delay;
+	igt_spin_t *spin;
+	uint32_t ctx;
+
+	igt_assert(igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &saved) == 1);
+	igt_debug("Initial heartbeat_interval_ms:%u\n", saved);
+	gem_quiescent_gpu(i915);
+
+	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
+	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
+
+	delay = -1;
+	igt_sysfs_printf(engine, "heartbeat_interval_ms", "%u", 0);
+	igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &delay);
+	igt_require(delay == 0);
+
+	ctx = create_context(i915, class, inst, 0);
+
+	spin = igt_spin_new(i915, ctx,
+			    .flags = (IGT_SPIN_POLL_RUN |
+				      IGT_SPIN_NO_PREEMPTION |
+				      IGT_SPIN_FENCE_OUT));
+	igt_spin_busywait_until_started(spin);
+
+	for (int i = 0; i < 150; i++) {
+		igt_assert_eq(sync_fence_status(spin->out_fence), 0);
+		sleep(1);
+	}
+
+	delay = -1;
+	igt_sysfs_printf(engine, "heartbeat_interval_ms", "%u", 1);
+	igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &delay);
+	igt_assert_eq(delay, 1);
+
+	igt_assert_eq(sync_fence_wait(spin->out_fence, 250), 0);
+	igt_assert_eq(sync_fence_status(spin->out_fence), -EIO);
+
+	igt_spin_free(i915, spin);
+
+	gem_quiescent_gpu(i915);
+	igt_sysfs_printf(engine, "heartbeat_interval_ms", "%u", saved);
+	igt_sysfs_scanf(engine, "heartbeat_interval_ms", "%u", &delay);
+	igt_assert_eq(delay, saved);
+}
+
+igt_main
+{
+	const struct intel_execution_engine2 *it;
+	int i915 = -1, engines = -1;
+
+	igt_fixture {
+		int sys;
+
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+
+		sys = igt_sysfs_open(i915);
+		igt_require(sys != -1);
+
+		engines = openat(sys, "engine", O_RDONLY);
+		igt_require(engines != -1);
+		close(sys);
+
+		enable_hangcheck(i915, true);
+	}
+
+	__for_each_static_engine(it) {
+		igt_subtest_group {
+			int engine = -1;
+			char *name = NULL;
+
+			igt_fixture {
+				struct stat st;
+
+				engine = openat(engines, it->name, O_RDONLY);
+				igt_require(fstatat(engine,
+						    "heartbeat_interval_ms",
+						    &st, 0) == 0);
+				name = igt_sysfs_get(engine, "name");
+			}
+			if (!name)
+				continue;
+
+			igt_subtest_f("%s-idempotent", name)
+				test_idempotent(i915, engine);
+			igt_subtest_f("%s-invalid", name)
+				test_invalid(i915, engine);
+
+			igt_subtest_f("%s-precise", name)
+				test_precise(i915, engine);
+			igt_subtest_f("%s-nopreempt", name)
+				test_nopreempt(i915, engine);
+			igt_subtest_f("%s-mixed", name)
+				test_mixed(i915, engine);
+			igt_subtest_f("%s-off", name)
+				test_off(i915, engine);
+			igt_subtest_f("%s-long", name)
+				test_long(i915, engine);
+
+			free(name);
+			close(engine);
+		}
+	}
+
+	igt_fixture {
+		close(engines);
+		close(i915);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index a699377e3..23f730c20 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -238,6 +238,7 @@ i915_progs = [
 	'i915_query',
 	'i915_selftest',
 	'i915_suspend',
+	'sysfs_heartbeat_interval',
 	'sysfs_preemption_timeout',
 ]
 
-- 
2.24.0.rc0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2019-10-19 10:05 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-19 10:04 [PATCH i-g-t 1/3] i915: Exercise preemption timeout controls in sysfs Chris Wilson
2019-10-19 10:04 ` Chris Wilson [this message]
2019-10-19 10:04 ` [PATCH i-g-t 3/3] i915: Exercise timeslice sysfs property Chris Wilson
2019-10-19 10:14 ` ✗ GitLab.Pipeline: warning for series starting with [1/3] i915: Exercise preemption timeout controls in sysfs Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2019-10-18 13:38 sysfs property tests Chris Wilson
2019-10-18 13:38 ` [PATCH i-g-t 2/3] i915: Exercise sysfs heartbeat controls Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191019100439.24640-2-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.