All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: igt-dev@lists.freedesktop.org
Subject: [PATCH i-g-t 5/5] igt: Add gem_exec_balancer
Date: Thu, 20 Sep 2018 11:26:12 +0100	[thread overview]
Message-ID: <20180920102612.15291-5-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20180920102612.15291-1-chris@chris-wilson.co.uk>

Exercise the in-kernel load balancer checking that we can distribute
batches across the set of ctx->engines to avoid load.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/Makefile.am         |   1 +
 tests/Makefile.sources    |   1 +
 tests/gem_exec_balancer.c | 469 ++++++++++++++++++++++++++++++++++++++
 tests/meson.build         |   7 +
 4 files changed, 478 insertions(+)
 create mode 100644 tests/gem_exec_balancer.c

diff --git a/tests/Makefile.am b/tests/Makefile.am
index ee5a7c5e8..71cdcaaaa 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -107,6 +107,7 @@ gem_close_race_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_close_race_LDADD = $(LDADD) -lpthread
 gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_ctx_thrash_LDADD = $(LDADD) -lpthread
+gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
 gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_exec_parallel_LDADD = $(LDADD) -lpthread
 gem_fence_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 4e42a4f05..70a144798 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -67,6 +67,7 @@ TESTS_progs = \
 	gem_exec_async \
 	gem_exec_await \
 	gem_exec_bad_domains \
+	gem_exec_balancer \
 	gem_exec_basic \
 	gem_exec_big \
 	gem_exec_blt \
diff --git a/tests/gem_exec_balancer.c b/tests/gem_exec_balancer.c
new file mode 100644
index 000000000..954696cb8
--- /dev/null
+++ b/tests/gem_exec_balancer.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sched.h>
+
+#include "igt.h"
+#include "igt_perf.h"
+#include "i915/gem_ring.h"
+#include "sw_sync.h"
+
+IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
+
+#define I915_CONTEXT_PARAM_ENGINES 0x7
+
+struct class_instance {
+	uint32_t class;
+	uint32_t instance;
+};
+#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
+
+static bool has_class_instance(int i915, uint32_t class, uint32_t instance)
+{
+	int fd;
+
+	fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
+	if (fd != -1) {
+		close(fd);
+		return true;
+	}
+
+	return false;
+}
+
+static struct class_instance *
+list_engines(int i915, uint32_t class_mask, unsigned int *out)
+{
+	unsigned int count = 0, size = 64;
+	struct class_instance *engines;
+
+	engines = malloc(size * sizeof(*engines));
+	if (!engines) {
+		*out = 0;
+		return NULL;
+	}
+
+	for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
+	     class_mask;
+	     class++, class_mask >>= 1) {
+		if (!(class_mask & 1))
+			continue;
+
+		for (unsigned int instance = 0;
+		     instance < INSTANCE_COUNT;
+		     instance++) {
+		     if (!has_class_instance(i915, class, instance))
+			     continue;
+
+			if (count == size) {
+				struct class_instance *e;
+
+				size *= 2;
+				e = realloc(engines, size*sizeof(*engines));
+				if (!e) {
+					*out = count;
+					return engines;
+				}
+
+				engines = e;
+			}
+
+			engines[count++] = (struct class_instance){
+				.class = class,
+				.instance = instance,
+			};
+		}
+	}
+
+	if (!count) {
+		free(engines);
+		engines = NULL;
+	}
+
+	*out = count;
+	return engines;
+}
+
+static int __set_load_balancer(int i915, uint32_t ctx,
+			       const struct class_instance *ci,
+			       unsigned int count)
+{
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
+	struct balancer {
+		uint64_t next_extension;
+		uint64_t name;
+
+		uint64_t flags;
+		uint64_t mask;
+
+		uint64_t mbz[4];
+	} balancer = {
+		.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
+		.mask = ~0ull,
+	};
+	struct engines {
+		uint64_t extension;
+		uint64_t class_instance[count];
+	} engines;
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = ctx,
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.size = sizeof(engines),
+		.value = to_user_pointer(&engines)
+	};
+
+	engines.extension = to_user_pointer(&balancer);
+	memcpy(engines.class_instance, ci, sizeof(engines.class_instance));
+
+	return __gem_context_set_param(i915, &p);
+}
+
+static void set_load_balancer(int i915, uint32_t ctx,
+			      const struct class_instance *ci,
+			      unsigned int count)
+{
+	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
+}
+
+static uint32_t load_balancer_create(int i915,
+				     const struct class_instance *ci,
+				     unsigned int count)
+{
+	uint32_t ctx;
+
+	ctx = gem_queue_create(i915);
+	set_load_balancer(i915, ctx, ci, count);
+
+	return ctx;
+}
+
+static void kick_kthreads(int period_us)
+{
+	sched_yield();
+	usleep(period_us);
+}
+
+static double measure_load(int pmu, int period_us)
+{
+	uint64_t data[2];
+	uint64_t d_t, d_v;
+
+	kick_kthreads(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	d_v = -data[0];
+	d_t = -data[1];
+
+	usleep(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	d_v += data[0];
+	d_t += data[1];
+
+	return d_v / (double)d_t;
+}
+
+static double measure_min_load(int pmu, unsigned int num, int period_us)
+{
+	uint64_t data[2 + num];
+	uint64_t d_t, d_v[num];
+	uint64_t min = -1, max = 0;
+
+	kick_kthreads(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	for (unsigned int n = 0; n < num; n++)
+		d_v[n] = -data[2 + n];
+	d_t = -data[1];
+
+	usleep(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+
+	d_t += data[1];
+	for (unsigned int n = 0; n < num; n++) {
+		d_v[n] += data[2 + n];
+		igt_debug("engine[%d]: %.1f%%\n",
+			  n, d_v[n] / (double)d_t * 100);
+		if (d_v[n] < min)
+			min = d_v[n];
+		if (d_v[n] > max)
+			max = d_v[n];
+	}
+
+	igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
+		  d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
+
+	return min / (double)d_t;
+}
+
+static void check_individual_engine(int i915,
+				    uint32_t ctx,
+				    const struct class_instance *ci,
+				    int idx)
+{
+	igt_spin_t *spin;
+	double load;
+	int pmu;
+
+	pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].class,
+						  ci[idx].instance));
+
+	spin = igt_spin_batch_new(i915, .ctx = ctx, .engine = idx + 1);
+	load = measure_load(pmu, 10000);
+	igt_spin_batch_free(i915, spin);
+
+	close(pmu);
+
+	igt_assert_f(load > 0.90,
+		     "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
+		     idx, ci[idx].class, ci[idx].instance, load*100);
+}
+
+static void individual(int i915)
+{
+	uint32_t ctx;
+
+	/*
+	 * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
+	 * supplied array from gem_execbuf(). Our check is to build the
+	 * ctx->engine[] with various different engine classes, feed in
+	 * a spinner and then ask pmu to confirm it the expected engine
+	 * was busy.
+	 */
+
+	ctx = gem_queue_create(i915);
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct class_instance *ci;
+		unsigned int count;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		for (int pass = 0; pass < count; pass++) { /* approx. count! */
+			igt_permute_array(ci, count, igt_exchange_int64);
+			set_load_balancer(i915, ctx, ci, count);
+			for (unsigned int n = 0; n < count; n++)
+				check_individual_engine(i915, ctx, ci, n);
+		}
+
+		free(ci);
+	}
+
+	gem_context_destroy(i915, ctx);
+}
+
+static int add_pmu(int pmu, const struct class_instance *ci)
+{
+	return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->class,
+							 ci->instance),
+				    pmu);
+}
+
+static uint32_t batch_create(int i915)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, 4096);
+	gem_write(i915, handle, 0, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static void full(int i915, unsigned int flags)
+#define PULSE 0x1
+#define LATE 0x2
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+
+	if (flags & LATE)
+		igt_require_sw_sync();
+
+	/*
+	 * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
+	 * to provide an automatic selection from the ctx->engine[]. It
+	 * employs load-balancing to evenly distribute the workload the
+	 * array. If we submit N spinners, we expect them to be simultaneously
+	 * running across N engines and use PMU to confirm that the entire
+	 * set of engines are busy.
+	 *
+	 * We complicate matters by interpersing shortlived tasks to challenge
+	 * the kernel to search for space in which to insert new batches.
+	 */
+
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct class_instance *ci;
+		igt_spin_t *spin = NULL;
+		unsigned int count;
+		IGT_CORK_FENCE(cork);
+		double load;
+		int fence = -1;
+		int *pmu;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		pmu = malloc(sizeof(*pmu) * count);
+		igt_assert(pmu);
+
+		if (flags & LATE)
+			fence = igt_cork_plug(&cork, i915);
+
+		pmu[0] = -1;
+		for (unsigned int n = 0; n < count; n++) {
+			uint32_t ctx;
+
+			pmu[n] = add_pmu(pmu[0], &ci[n]);
+
+			if (flags & PULSE) {
+				struct drm_i915_gem_execbuffer2 eb = {
+					.buffers_ptr = to_user_pointer(&batch),
+					.buffer_count = 1,
+					.rsvd2 = fence,
+					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
+				};
+
+				gem_execbuf(i915, &eb);
+			}
+
+			/*
+			 * Each spinner needs to be one a new timeline,
+			 * otherwise they will just sit in the single queue
+			 * and not run concurrently.
+			 */
+			ctx = load_balancer_create(i915, ci, count);
+
+			if (spin == NULL) {
+				spin = __igt_spin_batch_new(i915, ctx, 0, 0);
+			} else {
+				struct drm_i915_gem_exec_object2 obj = {
+					.handle = spin->handle,
+				};
+				struct drm_i915_gem_execbuffer2 eb = {
+					.buffers_ptr = to_user_pointer(&obj),
+					.buffer_count = 1,
+					.rsvd1 = ctx,
+					.rsvd2 = fence,
+					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
+				};
+
+				gem_execbuf(i915, &eb);
+			}
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		if (flags & LATE) {
+			igt_cork_unplug(&cork);
+			close(fence);
+		}
+
+		load = measure_min_load(pmu[0], count, 10000);
+		igt_spin_batch_free(i915, spin);
+
+		close(pmu[0]);
+		free(pmu);
+
+		free(ci);
+
+		igt_assert_f(load > 0.90,
+			     "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
+			     count, mask, load*100);
+	}
+
+	gem_close(i915, batch.handle);
+}
+
+static bool has_context_engines(int i915)
+{
+	struct drm_i915_gem_context_param p = {
+		.param = I915_CONTEXT_PARAM_ENGINES,
+	};
+
+	return __gem_context_set_param(i915, &p) == 0;
+}
+
+static bool has_load_balancer(int i915)
+{
+	struct class_instance ci = {};
+	uint32_t ctx;
+	int err;
+
+	ctx = gem_queue_create(i915);
+	err = __set_load_balancer(i915, ctx, &ci, 1);
+	gem_context_destroy(i915, ctx);
+
+	return err == 0;
+}
+
+igt_main
+{
+	int i915 = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+
+		gem_require_contexts(i915);
+		igt_require(has_context_engines(i915));
+		igt_require(has_load_balancer(i915));
+
+		igt_fork_hang_detector(i915);
+	}
+
+	igt_subtest("individual")
+		individual(i915);
+
+	igt_subtest_group {
+		static const struct {
+			const char *name;
+			unsigned int flags;
+		} phases[] = {
+			{ "", 0 },
+			{ "-pulse", PULSE },
+			{ "-late", LATE },
+			{ "-late-pulse", PULSE | LATE },
+			{ }
+		};
+		for (typeof(*phases) *p = phases; p->name; p++)
+			igt_subtest_f("full%s", p->name)
+				full(i915, p->flags);
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index b6da4f479..420d626fb 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -256,6 +256,13 @@ test_executables += executable('gem_eio', 'gem_eio.c',
 	   install : true)
 test_progs += 'gem_eio'
 
+test_executables += executable('gem_exec_balancer', 'gem_exec_balancer.c',
+	   dependencies : test_deps + [ lib_igt_perf ],
+	   install_dir : libexecdir,
+	   install_rpath : libexecdir_rpathdir,
+	   install : true)
+test_progs += 'gem_exec_balancer'
+
 test_executables += executable('gem_mocs_settings', 'gem_mocs_settings.c',
 	   dependencies : test_deps + [ lib_igt_perf ],
 	   install_dir : libexecdir,
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

WARNING: multiple messages have this Message-ID (diff)
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: igt-dev@lists.freedesktop.org
Subject: [igt-dev] [PATCH i-g-t 5/5] igt: Add gem_exec_balancer
Date: Thu, 20 Sep 2018 11:26:12 +0100	[thread overview]
Message-ID: <20180920102612.15291-5-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20180920102612.15291-1-chris@chris-wilson.co.uk>

Exercise the in-kernel load balancer checking that we can distribute
batches across the set of ctx->engines to avoid load.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/Makefile.am         |   1 +
 tests/Makefile.sources    |   1 +
 tests/gem_exec_balancer.c | 469 ++++++++++++++++++++++++++++++++++++++
 tests/meson.build         |   7 +
 4 files changed, 478 insertions(+)
 create mode 100644 tests/gem_exec_balancer.c

diff --git a/tests/Makefile.am b/tests/Makefile.am
index ee5a7c5e8..71cdcaaaa 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -107,6 +107,7 @@ gem_close_race_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_close_race_LDADD = $(LDADD) -lpthread
 gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_ctx_thrash_LDADD = $(LDADD) -lpthread
+gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
 gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_exec_parallel_LDADD = $(LDADD) -lpthread
 gem_fence_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 4e42a4f05..70a144798 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -67,6 +67,7 @@ TESTS_progs = \
 	gem_exec_async \
 	gem_exec_await \
 	gem_exec_bad_domains \
+	gem_exec_balancer \
 	gem_exec_basic \
 	gem_exec_big \
 	gem_exec_blt \
diff --git a/tests/gem_exec_balancer.c b/tests/gem_exec_balancer.c
new file mode 100644
index 000000000..954696cb8
--- /dev/null
+++ b/tests/gem_exec_balancer.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sched.h>
+
+#include "igt.h"
+#include "igt_perf.h"
+#include "i915/gem_ring.h"
+#include "sw_sync.h"
+
+IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
+
+#define I915_CONTEXT_PARAM_ENGINES 0x7
+
+struct class_instance {
+	uint32_t class;
+	uint32_t instance;
+};
+#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
+
+static bool has_class_instance(int i915, uint32_t class, uint32_t instance)
+{
+	int fd;
+
+	fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
+	if (fd != -1) {
+		close(fd);
+		return true;
+	}
+
+	return false;
+}
+
+static struct class_instance *
+list_engines(int i915, uint32_t class_mask, unsigned int *out)
+{
+	unsigned int count = 0, size = 64;
+	struct class_instance *engines;
+
+	engines = malloc(size * sizeof(*engines));
+	if (!engines) {
+		*out = 0;
+		return NULL;
+	}
+
+	for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
+	     class_mask;
+	     class++, class_mask >>= 1) {
+		if (!(class_mask & 1))
+			continue;
+
+		for (unsigned int instance = 0;
+		     instance < INSTANCE_COUNT;
+		     instance++) {
+		     if (!has_class_instance(i915, class, instance))
+			     continue;
+
+			if (count == size) {
+				struct class_instance *e;
+
+				size *= 2;
+				e = realloc(engines, size*sizeof(*engines));
+				if (!e) {
+					*out = count;
+					return engines;
+				}
+
+				engines = e;
+			}
+
+			engines[count++] = (struct class_instance){
+				.class = class,
+				.instance = instance,
+			};
+		}
+	}
+
+	if (!count) {
+		free(engines);
+		engines = NULL;
+	}
+
+	*out = count;
+	return engines;
+}
+
+static int __set_load_balancer(int i915, uint32_t ctx,
+			       const struct class_instance *ci,
+			       unsigned int count)
+{
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
+	struct balancer {
+		uint64_t next_extension;
+		uint64_t name;
+
+		uint64_t flags;
+		uint64_t mask;
+
+		uint64_t mbz[4];
+	} balancer = {
+		.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
+		.mask = ~0ull,
+	};
+	struct engines {
+		uint64_t extension;
+		uint64_t class_instance[count];
+	} engines;
+	struct drm_i915_gem_context_param p = {
+		.ctx_id = ctx,
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.size = sizeof(engines),
+		.value = to_user_pointer(&engines)
+	};
+
+	engines.extension = to_user_pointer(&balancer);
+	memcpy(engines.class_instance, ci, sizeof(engines.class_instance));
+
+	return __gem_context_set_param(i915, &p);
+}
+
+static void set_load_balancer(int i915, uint32_t ctx,
+			      const struct class_instance *ci,
+			      unsigned int count)
+{
+	igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
+}
+
+static uint32_t load_balancer_create(int i915,
+				     const struct class_instance *ci,
+				     unsigned int count)
+{
+	uint32_t ctx;
+
+	ctx = gem_queue_create(i915);
+	set_load_balancer(i915, ctx, ci, count);
+
+	return ctx;
+}
+
+static void kick_kthreads(int period_us)
+{
+	sched_yield();
+	usleep(period_us);
+}
+
+static double measure_load(int pmu, int period_us)
+{
+	uint64_t data[2];
+	uint64_t d_t, d_v;
+
+	kick_kthreads(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	d_v = -data[0];
+	d_t = -data[1];
+
+	usleep(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	d_v += data[0];
+	d_t += data[1];
+
+	return d_v / (double)d_t;
+}
+
+static double measure_min_load(int pmu, unsigned int num, int period_us)
+{
+	uint64_t data[2 + num];
+	uint64_t d_t, d_v[num];
+	uint64_t min = -1, max = 0;
+
+	kick_kthreads(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+	for (unsigned int n = 0; n < num; n++)
+		d_v[n] = -data[2 + n];
+	d_t = -data[1];
+
+	usleep(period_us);
+
+	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
+
+	d_t += data[1];
+	for (unsigned int n = 0; n < num; n++) {
+		d_v[n] += data[2 + n];
+		igt_debug("engine[%d]: %.1f%%\n",
+			  n, d_v[n] / (double)d_t * 100);
+		if (d_v[n] < min)
+			min = d_v[n];
+		if (d_v[n] > max)
+			max = d_v[n];
+	}
+
+	igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
+		  d_t, min / (double)d_t * 100,  max / (double)d_t * 100);
+
+	return min / (double)d_t;
+}
+
+static void check_individual_engine(int i915,
+				    uint32_t ctx,
+				    const struct class_instance *ci,
+				    int idx)
+{
+	igt_spin_t *spin;
+	double load;
+	int pmu;
+
+	pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].class,
+						  ci[idx].instance));
+
+	spin = igt_spin_batch_new(i915, .ctx = ctx, .engine = idx + 1);
+	load = measure_load(pmu, 10000);
+	igt_spin_batch_free(i915, spin);
+
+	close(pmu);
+
+	igt_assert_f(load > 0.90,
+		     "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
+		     idx, ci[idx].class, ci[idx].instance, load*100);
+}
+
+static void individual(int i915)
+{
+	uint32_t ctx;
+
+	/*
+	 * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
+	 * supplied array from gem_execbuf(). Our check is to build the
+	 * ctx->engine[] with various different engine classes, feed in
+	 * a spinner and then ask pmu to confirm it the expected engine
+	 * was busy.
+	 */
+
+	ctx = gem_queue_create(i915);
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct class_instance *ci;
+		unsigned int count;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		for (int pass = 0; pass < count; pass++) { /* approx. count! */
+			igt_permute_array(ci, count, igt_exchange_int64);
+			set_load_balancer(i915, ctx, ci, count);
+			for (unsigned int n = 0; n < count; n++)
+				check_individual_engine(i915, ctx, ci, n);
+		}
+
+		free(ci);
+	}
+
+	gem_context_destroy(i915, ctx);
+}
+
+static int add_pmu(int pmu, const struct class_instance *ci)
+{
+	return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->class,
+							 ci->instance),
+				    pmu);
+}
+
+static uint32_t batch_create(int i915)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, 4096);
+	gem_write(i915, handle, 0, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static void full(int i915, unsigned int flags)
+#define PULSE 0x1
+#define LATE 0x2
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+
+	if (flags & LATE)
+		igt_require_sw_sync();
+
+	/*
+	 * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
+	 * to provide an automatic selection from the ctx->engine[]. It
+	 * employs load-balancing to evenly distribute the workload the
+	 * array. If we submit N spinners, we expect them to be simultaneously
+	 * running across N engines and use PMU to confirm that the entire
+	 * set of engines are busy.
+	 *
+	 * We complicate matters by interpersing shortlived tasks to challenge
+	 * the kernel to search for space in which to insert new batches.
+	 */
+
+
+	for (int mask = 0; mask < 32; mask++) {
+		struct class_instance *ci;
+		igt_spin_t *spin = NULL;
+		unsigned int count;
+		IGT_CORK_FENCE(cork);
+		double load;
+		int fence = -1;
+		int *pmu;
+
+		ci = list_engines(i915, 1u << mask, &count);
+		if (!ci)
+			continue;
+
+		igt_debug("Found %d engines of class %d\n", count, mask);
+
+		pmu = malloc(sizeof(*pmu) * count);
+		igt_assert(pmu);
+
+		if (flags & LATE)
+			fence = igt_cork_plug(&cork, i915);
+
+		pmu[0] = -1;
+		for (unsigned int n = 0; n < count; n++) {
+			uint32_t ctx;
+
+			pmu[n] = add_pmu(pmu[0], &ci[n]);
+
+			if (flags & PULSE) {
+				struct drm_i915_gem_execbuffer2 eb = {
+					.buffers_ptr = to_user_pointer(&batch),
+					.buffer_count = 1,
+					.rsvd2 = fence,
+					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
+				};
+
+				gem_execbuf(i915, &eb);
+			}
+
+			/*
+			 * Each spinner needs to be one a new timeline,
+			 * otherwise they will just sit in the single queue
+			 * and not run concurrently.
+			 */
+			ctx = load_balancer_create(i915, ci, count);
+
+			if (spin == NULL) {
+				spin = __igt_spin_batch_new(i915, ctx, 0, 0);
+			} else {
+				struct drm_i915_gem_exec_object2 obj = {
+					.handle = spin->handle,
+				};
+				struct drm_i915_gem_execbuffer2 eb = {
+					.buffers_ptr = to_user_pointer(&obj),
+					.buffer_count = 1,
+					.rsvd1 = ctx,
+					.rsvd2 = fence,
+					.flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
+				};
+
+				gem_execbuf(i915, &eb);
+			}
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		if (flags & LATE) {
+			igt_cork_unplug(&cork);
+			close(fence);
+		}
+
+		load = measure_min_load(pmu[0], count, 10000);
+		igt_spin_batch_free(i915, spin);
+
+		close(pmu[0]);
+		free(pmu);
+
+		free(ci);
+
+		igt_assert_f(load > 0.90,
+			     "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
+			     count, mask, load*100);
+	}
+
+	gem_close(i915, batch.handle);
+}
+
+static bool has_context_engines(int i915)
+{
+	struct drm_i915_gem_context_param p = {
+		.param = I915_CONTEXT_PARAM_ENGINES,
+	};
+
+	return __gem_context_set_param(i915, &p) == 0;
+}
+
+static bool has_load_balancer(int i915)
+{
+	struct class_instance ci = {};
+	uint32_t ctx;
+	int err;
+
+	ctx = gem_queue_create(i915);
+	err = __set_load_balancer(i915, ctx, &ci, 1);
+	gem_context_destroy(i915, ctx);
+
+	return err == 0;
+}
+
+igt_main
+{
+	int i915 = -1;
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
+
+		gem_require_contexts(i915);
+		igt_require(has_context_engines(i915));
+		igt_require(has_load_balancer(i915));
+
+		igt_fork_hang_detector(i915);
+	}
+
+	igt_subtest("individual")
+		individual(i915);
+
+	igt_subtest_group {
+		static const struct {
+			const char *name;
+			unsigned int flags;
+		} phases[] = {
+			{ "", 0 },
+			{ "-pulse", PULSE },
+			{ "-late", LATE },
+			{ "-late-pulse", PULSE | LATE },
+			{ }
+		};
+		for (typeof(*phases) *p = phases; p->name; p++)
+			igt_subtest_f("full%s", p->name)
+				full(i915, p->flags);
+	}
+
+	igt_fixture {
+		igt_stop_hang_detector();
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index b6da4f479..420d626fb 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -256,6 +256,13 @@ test_executables += executable('gem_eio', 'gem_eio.c',
 	   install : true)
 test_progs += 'gem_eio'
 
+test_executables += executable('gem_exec_balancer', 'gem_exec_balancer.c',
+	   dependencies : test_deps + [ lib_igt_perf ],
+	   install_dir : libexecdir,
+	   install_rpath : libexecdir_rpathdir,
+	   install : true)
+test_progs += 'gem_exec_balancer'
+
 test_executables += executable('gem_mocs_settings', 'gem_mocs_settings.c',
 	   dependencies : test_deps + [ lib_igt_perf ],
 	   install_dir : libexecdir,
-- 
2.19.0

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

  parent reply	other threads:[~2018-09-20 10:26 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-20 10:26 [PATCH i-g-t 1/5] igt: Exercise creating context with shared GTT Chris Wilson
2018-09-20 10:26 ` [Intel-gfx] " Chris Wilson
2018-09-20 10:26 ` [PATCH i-g-t 2/5] igt/gem_ctx_switch: Exercise queues Chris Wilson
2018-09-20 10:26   ` [igt-dev] " Chris Wilson
2018-09-20 10:26 ` [PATCH i-g-t 3/5] igt/gem_exec_whisper: Fork all-engine tests one-per-engine Chris Wilson
2018-09-20 10:26   ` [igt-dev] " Chris Wilson
2018-09-20 10:26 ` [PATCH i-g-t 4/5] igt: Add gem_ctx_engines Chris Wilson
2018-09-20 10:26   ` [Intel-gfx] " Chris Wilson
2018-09-20 10:26 ` Chris Wilson [this message]
2018-09-20 10:26   ` [igt-dev] [PATCH i-g-t 5/5] igt: Add gem_exec_balancer Chris Wilson
2018-09-20 12:01 ` [igt-dev] ✓ Fi.CI.BAT: success for series starting with [i-g-t,1/5] igt: Exercise creating context with shared GTT Patchwork
2018-09-20 13:46 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2018-06-27 14:46 [PATCH i-g-t 1/5] " Chris Wilson
2018-06-27 14:46 ` [PATCH i-g-t 5/5] igt: Add gem_exec_balancer Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180920102612.15291-5-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.