All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-01 19:08 ` Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-01 19:08 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 224 +++++++++++++++++++++++++++++++++
 1 file changed, 224 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..0ec21bf54 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,227 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  1000000000);
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void async_delay(int i915,
+			const struct intel_execution_engine2 *e,
+			uint32_t handle,
+			uint64_t addr,
+			uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
+
+	if (offset_in_page(cs) & 4)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+timed_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	async_delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeout,
+		       unsigned long *out)
+{
+	const int batches_per_frame = 3;
+	struct drm_i915_gem_exec_object2 prev =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 next =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct timespec tv = {};
+	unsigned long count = 0;
+
+	igt_nsec_elapsed(&tv);
+	igt_until_timeout(timeout) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&next),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+			.flags = e->flags,
+		};
+
+		for (int n = 0; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+
+		gem_sync(i915, prev.handle);
+		igt_swap(prev, next);
+		count++;
+	}
+	gem_sync(i915, prev.handle);
+	*out = igt_nsec_elapsed(&tv) / count;
+
+	gem_close(i915, next.handle);
+	gem_close(i915, prev.handle);
+}
+
+static int ul_cmp(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout)
+{
+	const int frame_ns = 16666 * 1000;
+	unsigned long *result;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 16; n <<= 1) {
+		int nchild = n - 1; /* odd for easy medians */
+
+		memset(result, 0, nchild * sizeof(result[0]));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+
+			fair_child(i915, ctx, e, frame_ns / nchild,
+				   timeout, &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+		igt_waitchildren();
+
+		qsort(result, nchild, sizeof(*result), ul_cmp);
+		igt_info("%d clients, range: [%lu, %lu], median: %lu\n",
+			 nchild, result[0], result[nchild-1], result[nchild/2]);
+
+		igt_assert(4 * result[0] > 3 * result[nchild-1]);
+		igt_assert(3 * result[0] < 4 * result[nchild-1]);
+
+		igt_assert(4 * result[nchild/2] > 3 * frame_ns);
+		igt_assert(3 * result[nchild/2] < 4 * frame_ns);
+	}
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2589,6 +2810,9 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fairness", fd, e)
+			fairness(fd, e, 3);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-01 19:08 ` Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-01 19:08 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Tvrtko Ursulin, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 224 +++++++++++++++++++++++++++++++++
 1 file changed, 224 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..0ec21bf54 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,227 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  1000000000);
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void async_delay(int i915,
+			const struct intel_execution_engine2 *e,
+			uint32_t handle,
+			uint64_t addr,
+			uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
+
+	if (offset_in_page(cs) & 4)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+timed_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	async_delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeout,
+		       unsigned long *out)
+{
+	const int batches_per_frame = 3;
+	struct drm_i915_gem_exec_object2 prev =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 next =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct timespec tv = {};
+	unsigned long count = 0;
+
+	igt_nsec_elapsed(&tv);
+	igt_until_timeout(timeout) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&next),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+			.flags = e->flags,
+		};
+
+		for (int n = 0; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+
+		gem_sync(i915, prev.handle);
+		igt_swap(prev, next);
+		count++;
+	}
+	gem_sync(i915, prev.handle);
+	*out = igt_nsec_elapsed(&tv) / count;
+
+	gem_close(i915, next.handle);
+	gem_close(i915, prev.handle);
+}
+
+static int ul_cmp(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout)
+{
+	const int frame_ns = 16666 * 1000;
+	unsigned long *result;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 16; n <<= 1) {
+		int nchild = n - 1; /* odd for easy medians */
+
+		memset(result, 0, nchild * sizeof(result[0]));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+
+			fair_child(i915, ctx, e, frame_ns / nchild,
+				   timeout, &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+		igt_waitchildren();
+
+		qsort(result, nchild, sizeof(*result), ul_cmp);
+		igt_info("%d clients, range: [%lu, %lu], median: %lu\n",
+			 nchild, result[0], result[nchild-1], result[nchild/2]);
+
+		igt_assert(4 * result[0] > 3 * result[nchild-1]);
+		igt_assert(3 * result[0] < 4 * result[nchild-1]);
+
+		igt_assert(4 * result[nchild/2] > 3 * frame_ns);
+		igt_assert(3 * result[nchild/2] < 4 * frame_ns);
+	}
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2589,6 +2810,9 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fairness", fd, e)
+			fairness(fd, e, 3);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0.rc2

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness
  2020-06-01 19:08 ` [igt-dev] " Chris Wilson
  (?)
@ 2020-06-01 19:37 ` Patchwork
  -1 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2020-06-01 19:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: i915/gem_exec_schedule: Try to spot unfairness
URL   : https://patchwork.freedesktop.org/series/77887/
State : success

== Summary ==

CI Bug Log - changes from IGT_5690 -> IGTPW_4631
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_4631:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@gem_exec_parallel@engines@contexts}:
    - {fi-ehl-1}:         [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/fi-ehl-1/igt@gem_exec_parallel@engines@contexts.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/fi-ehl-1/igt@gem_exec_parallel@engines@contexts.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).



Participating hosts (51 -> 45)
------------------------------

  Missing    (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5690 -> IGTPW_4631

  CI-20190529: 20190529
  CI_DRM_8566: fed6b89dd6f3c4e2e909805815c5728b1fd65ce5 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_4631: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/index.html
  IGT_5690: bea881189520a9cccbb1c1cb454ac5b6fdaea40e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+igt@gem_exec_schedule@fairness

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
  2020-06-01 19:08 ` [igt-dev] " Chris Wilson
@ 2020-06-01 19:53   ` Chris Wilson
  -1 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-01 19:53 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 245 +++++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..5d91e94a3 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,246 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  1000000000);
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void async_delay(int i915,
+			const struct intel_execution_engine2 *e,
+			uint32_t handle,
+			uint64_t addr,
+			uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
+
+	if (offset_in_page(cs) & 4)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+timed_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	async_delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeout,
+		       unsigned int flags,
+		       unsigned long *out)
+#define F_PACING 0x1
+{
+	const int batches_per_frame = 3;
+	struct drm_i915_gem_exec_object2 prev =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 next =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct timespec tv = {};
+	unsigned long count = 0;
+	int p_fence = -1, n_fence = -1;
+
+	igt_nsec_elapsed(&tv);
+	igt_until_timeout(timeout) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&next),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+			.flags = e->flags,
+		};
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~I915_EXEC_FENCE_OUT;
+		for (int n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACING && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+
+		igt_swap(prev, next);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	gem_sync(i915, prev.handle);
+	*out = igt_nsec_elapsed(&tv) / count;
+	close(p_fence);
+
+	gem_close(i915, next.handle);
+	gem_close(i915, prev.handle);
+}
+
+static int ul_cmp(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	unsigned long *result;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 16; n <<= 1) {
+		int nchild = n - 1; /* odd for easy medians */
+
+		memset(result, 0, nchild * sizeof(result[0]));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+
+			fair_child(i915, ctx, e, frame_ns / nchild,
+				   timeout, flags, &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+		igt_waitchildren();
+
+		qsort(result, nchild, sizeof(*result), ul_cmp);
+		igt_info("%d clients, range: [%lu, %lu], median: %lu\n",
+			 nchild, result[0], result[nchild-1], result[nchild/2]);
+
+		igt_assert(4 * result[0] > 3 * result[nchild-1]);
+		igt_assert(3 * result[0] < 4 * result[nchild-1]);
+
+		igt_assert(4 * result[nchild/2] > 3 * frame_ns);
+		igt_assert(3 * result[nchild/2] < 4 * frame_ns);
+	}
+
+	munmap(result, 4096);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2589,6 +2829,11 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fairness", fd, e)
+			fairness(fd, e, 3, F_PACING);
+		test_each_engine_store("unfairness", fd, e)
+			fairness(fd, e, 3, 0);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-01 19:53   ` Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-01 19:53 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Tvrtko Ursulin, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 245 +++++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..5d91e94a3 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,246 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  1000000000);
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void async_delay(int i915,
+			const struct intel_execution_engine2 *e,
+			uint32_t handle,
+			uint64_t addr,
+			uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
+
+	if (offset_in_page(cs) & 4)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+timed_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	async_delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeout,
+		       unsigned int flags,
+		       unsigned long *out)
+#define F_PACING 0x1
+{
+	const int batches_per_frame = 3;
+	struct drm_i915_gem_exec_object2 prev =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 next =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct timespec tv = {};
+	unsigned long count = 0;
+	int p_fence = -1, n_fence = -1;
+
+	igt_nsec_elapsed(&tv);
+	igt_until_timeout(timeout) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&next),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+			.flags = e->flags,
+		};
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~I915_EXEC_FENCE_OUT;
+		for (int n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACING && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+
+		igt_swap(prev, next);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	gem_sync(i915, prev.handle);
+	*out = igt_nsec_elapsed(&tv) / count;
+	close(p_fence);
+
+	gem_close(i915, next.handle);
+	gem_close(i915, prev.handle);
+}
+
+static int ul_cmp(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	unsigned long *result;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 16; n <<= 1) {
+		int nchild = n - 1; /* odd for easy medians */
+
+		memset(result, 0, nchild * sizeof(result[0]));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+
+			fair_child(i915, ctx, e, frame_ns / nchild,
+				   timeout, flags, &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+		igt_waitchildren();
+
+		qsort(result, nchild, sizeof(*result), ul_cmp);
+		igt_info("%d clients, range: [%lu, %lu], median: %lu\n",
+			 nchild, result[0], result[nchild-1], result[nchild/2]);
+
+		igt_assert(4 * result[0] > 3 * result[nchild-1]);
+		igt_assert(3 * result[0] < 4 * result[nchild-1]);
+
+		igt_assert(4 * result[nchild/2] > 3 * frame_ns);
+		igt_assert(3 * result[nchild/2] < 4 * frame_ns);
+	}
+
+	munmap(result, 4096);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2589,6 +2829,11 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fairness", fd, e)
+			fairness(fd, e, 3, F_PACING);
+		test_each_engine_store("unfairness", fd, e)
+			fairness(fd, e, 3, 0);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0.rc2

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness (rev2)
  2020-06-01 19:08 ` [igt-dev] " Chris Wilson
                   ` (2 preceding siblings ...)
  (?)
@ 2020-06-01 20:24 ` Patchwork
  -1 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2020-06-01 20:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: i915/gem_exec_schedule: Try to spot unfairness (rev2)
URL   : https://patchwork.freedesktop.org/series/77887/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8567 -> IGTPW_4632
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/index.html


Changes
-------

  No changes found


Participating hosts (49 -> 45)
------------------------------

  Additional (2): fi-skl-lmem fi-cfl-8109u 
  Missing    (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5690 -> IGTPW_4632

  CI-20190529: 20190529
  CI_DRM_8567: d36c7a9807541df70739a5917cbbab42fdf66a29 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_4632: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/index.html
  IGT_5690: bea881189520a9cccbb1c1cb454ac5b6fdaea40e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+igt@gem_exec_schedule@fairness
+igt@gem_exec_schedule@unfairness

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
  2020-06-01 19:08 ` [igt-dev] " Chris Wilson
@ 2020-06-01 21:17   ` Chris Wilson
  -1 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-01 21:17 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 253 +++++++++++++++++++++++++++++++++
 1 file changed, 253 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..d58d926b1 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,254 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  1000000000);
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void async_delay(int i915,
+			const struct intel_execution_engine2 *e,
+			uint32_t handle,
+			uint64_t addr,
+			uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
+
+	if (offset_in_page(cs) & 4)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+timed_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	async_delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeout,
+		       unsigned int flags,
+		       unsigned long *out)
+#define F_PACING 0x1
+{
+	const int batches_per_frame = 3;
+	struct drm_i915_gem_exec_object2 prev =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 next =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct timespec tv = {};
+	unsigned long count = 0;
+	int p_fence = -1, n_fence = -1;
+
+	igt_nsec_elapsed(&tv);
+	igt_until_timeout(timeout) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&next),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+			.flags = e->flags,
+		};
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~I915_EXEC_FENCE_OUT;
+		for (int n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACING && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+
+		igt_swap(prev, next);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	gem_sync(i915, prev.handle);
+	*out = igt_nsec_elapsed(&tv) / count;
+	close(p_fence);
+
+	gem_close(i915, next.handle);
+	gem_close(i915, prev.handle);
+}
+
+static int ul_cmp(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	unsigned long *result;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 16; n <<= 1) {
+		const int nchild = n - 1; /* odd for easy medians */
+		const int iqr_lo = nchild / 4;
+		const int iqr_hi = (3 * nchild + 3) / 4 - 1;
+		unsigned long iqr;
+
+		memset(result, 0, nchild * sizeof(result[0]));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+
+			fair_child(i915, ctx, e, frame_ns / nchild,
+				   timeout, flags, &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+		igt_waitchildren();
+
+		qsort(result, nchild, sizeof(*result), ul_cmp);
+		igt_info("%d clients, range: [%lu, %lu], iqr: [%lu, %lu], median: %lu\n",
+			 nchild,
+			 result[0], result[nchild - 1],
+			 result[iqr_lo], result[iqr_hi],
+			 result[nchild / 2]);
+
+		/* Median within 10% of target */
+		igt_assert(10 * result[nchild / 2] > 9 * frame_ns &&
+			   9 * result[nchild / 2] < 10 * frame_ns);
+
+		/* Variance [inter-quartile range] is less than 33% of median */
+		iqr = result[iqr_hi] - result[iqr_lo];
+		igt_assert(3 * iqr < result[nchild / 2]);
+	}
+
+	munmap(result, 4096);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2589,6 +2837,11 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fairness", fd, e)
+			fairness(fd, e, 3, F_PACING);
+		test_each_engine_store("unfairness", fd, e)
+			fairness(fd, e, 3, 0);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-01 21:17   ` Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-01 21:17 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Tvrtko Ursulin, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 253 +++++++++++++++++++++++++++++++++
 1 file changed, 253 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..d58d926b1 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,254 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  1000000000);
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void async_delay(int i915,
+			const struct intel_execution_engine2 *e,
+			uint32_t handle,
+			uint64_t addr,
+			uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
+
+	if (offset_in_page(cs) & 4)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+timed_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	async_delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeout,
+		       unsigned int flags,
+		       unsigned long *out)
+#define F_PACING 0x1
+{
+	const int batches_per_frame = 3;
+	struct drm_i915_gem_exec_object2 prev =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 next =
+		timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct timespec tv = {};
+	unsigned long count = 0;
+	int p_fence = -1, n_fence = -1;
+
+	igt_nsec_elapsed(&tv);
+	igt_until_timeout(timeout) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&next),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+			.flags = e->flags,
+		};
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~I915_EXEC_FENCE_OUT;
+		for (int n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACING && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+
+		igt_swap(prev, next);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	gem_sync(i915, prev.handle);
+	*out = igt_nsec_elapsed(&tv) / count;
+	close(p_fence);
+
+	gem_close(i915, next.handle);
+	gem_close(i915, prev.handle);
+}
+
+static int ul_cmp(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	unsigned long *result;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 16; n <<= 1) {
+		const int nchild = n - 1; /* odd for easy medians */
+		const int iqr_lo = nchild / 4;
+		const int iqr_hi = (3 * nchild + 3) / 4 - 1;
+		unsigned long iqr;
+
+		memset(result, 0, nchild * sizeof(result[0]));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+
+			fair_child(i915, ctx, e, frame_ns / nchild,
+				   timeout, flags, &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+		igt_waitchildren();
+
+		qsort(result, nchild, sizeof(*result), ul_cmp);
+		igt_info("%d clients, range: [%lu, %lu], iqr: [%lu, %lu], median: %lu\n",
+			 nchild,
+			 result[0], result[nchild - 1],
+			 result[iqr_lo], result[iqr_hi],
+			 result[nchild / 2]);
+
+		/* Median within 10% of target */
+		igt_assert(10 * result[nchild / 2] > 9 * frame_ns &&
+			   9 * result[nchild / 2] < 10 * frame_ns);
+
+		/* Variance [inter-quartile range] is less than 33% of median */
+		iqr = result[iqr_hi] - result[iqr_lo];
+		igt_assert(3 * iqr < result[nchild / 2]);
+	}
+
+	munmap(result, 4096);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2589,6 +2837,11 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fairness", fd, e)
+			fairness(fd, e, 3, F_PACING);
+		test_each_engine_store("unfairness", fd, e)
+			fairness(fd, e, 3, 0);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0.rc2

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness (rev3)
  2020-06-01 19:08 ` [igt-dev] " Chris Wilson
                   ` (4 preceding siblings ...)
  (?)
@ 2020-06-01 22:02 ` Patchwork
  -1 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2020-06-01 22:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: i915/gem_exec_schedule: Try to spot unfairness (rev3)
URL   : https://patchwork.freedesktop.org/series/77887/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8567 -> IGTPW_4633
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/index.html


Changes
-------

  No changes found


Participating hosts (49 -> 44)
------------------------------

  Additional (2): fi-skl-lmem fi-cfl-8109u 
  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-kbl-7560u fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5690 -> IGTPW_4633

  CI-20190529: 20190529
  CI_DRM_8567: d36c7a9807541df70739a5917cbbab42fdf66a29 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_4633: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/index.html
  IGT_5690: bea881189520a9cccbb1c1cb454ac5b6fdaea40e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+igt@gem_exec_schedule@fairness
+igt@gem_exec_schedule@unfairness

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for i915/gem_exec_schedule: Try to spot unfairness
  2020-06-01 19:08 ` [igt-dev] " Chris Wilson
                   ` (5 preceding siblings ...)
  (?)
@ 2020-06-02  2:24 ` Patchwork
  -1 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2020-06-02  2:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: i915/gem_exec_schedule: Try to spot unfairness
URL   : https://patchwork.freedesktop.org/series/77887/
State : success

== Summary ==

CI Bug Log - changes from IGT_5690_full -> IGTPW_4631_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_4631_full:

### IGT changes ###

#### Possible regressions ####

  * {igt@gem_exec_schedule@fairness@bcs0} (NEW):
    - shard-glk:          NOTRUN -> [FAIL][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-glk4/igt@gem_exec_schedule@fairness@bcs0.html
    - shard-iclb:         NOTRUN -> [INCOMPLETE][2]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-iclb8/igt@gem_exec_schedule@fairness@bcs0.html
    - shard-apl:          NOTRUN -> [FAIL][3]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl7/igt@gem_exec_schedule@fairness@bcs0.html
    - shard-kbl:          NOTRUN -> [FAIL][4]
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-kbl3/igt@gem_exec_schedule@fairness@bcs0.html
    - shard-tglb:         NOTRUN -> [INCOMPLETE][5]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-tglb8/igt@gem_exec_schedule@fairness@bcs0.html

  * {igt@gem_exec_schedule@fairness@rcs0} (NEW):
    - shard-iclb:         NOTRUN -> [FAIL][6]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-iclb8/igt@gem_exec_schedule@fairness@rcs0.html

  
New tests
---------

  New tests have been introduced between IGT_5690_full and IGTPW_4631_full:

### New IGT tests (6) ###

  * igt@gem_exec_schedule@fairness:
    - Statuses : 2 skip(s)
    - Exec time: [0.0] s

  * igt@gem_exec_schedule@fairness@bcs0:
    - Statuses : 3 fail(s) 2 incomplete(s)
    - Exec time: [0.0, 3.37] s

  * igt@gem_exec_schedule@fairness@rcs0:
    - Statuses : 1 fail(s) 4 pass(s)
    - Exec time: [3.34, 13.39] s

  * igt@gem_exec_schedule@fairness@vcs0:
    - Statuses : 3 pass(s)
    - Exec time: [13.20, 13.36] s

  * igt@gem_exec_schedule@fairness@vcs1:
    - Statuses : 1 pass(s)
    - Exec time: [13.18] s

  * igt@gem_exec_schedule@fairness@vecs0:
    - Statuses : 3 pass(s)
    - Exec time: [13.18, 13.34] s

  

Known issues
------------

  Here are the changes found in IGTPW_4631_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_rpm@gem-execbuf:
    - shard-hsw:          [PASS][7] -> [INCOMPLETE][8] ([i915#151] / [i915#61])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-hsw1/igt@i915_pm_rpm@gem-execbuf.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-hsw1/igt@i915_pm_rpm@gem-execbuf.html

  * igt@kms_cursor_crc@pipe-a-cursor-256x256-offscreen:
    - shard-kbl:          [PASS][9] -> [FAIL][10] ([i915#54] / [i915#93] / [i915#95])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-kbl3/igt@kms_cursor_crc@pipe-a-cursor-256x256-offscreen.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-kbl2/igt@kms_cursor_crc@pipe-a-cursor-256x256-offscreen.html

  * igt@kms_cursor_crc@pipe-a-cursor-256x256-sliding:
    - shard-apl:          [PASS][11] -> [TIMEOUT][12] ([i915#1635]) +1 similar issue
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl2/igt@kms_cursor_crc@pipe-a-cursor-256x256-sliding.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl4/igt@kms_cursor_crc@pipe-a-cursor-256x256-sliding.html
    - shard-glk:          [PASS][13] -> [TIMEOUT][14] ([i915#1958]) +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-glk4/igt@kms_cursor_crc@pipe-a-cursor-256x256-sliding.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-glk1/igt@kms_cursor_crc@pipe-a-cursor-256x256-sliding.html

  * igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-untiled:
    - shard-kbl:          [PASS][15] -> [FAIL][16] ([i915#177] / [i915#52] / [i915#54] / [i915#93] / [i915#95]) +1 similar issue
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-kbl6/igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-untiled.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-kbl3/igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-untiled.html

  * igt@kms_draw_crc@draw-method-xrgb8888-mmap-wc-ytiled:
    - shard-apl:          [PASS][17] -> [FAIL][18] ([i915#52] / [i915#54] / [i915#95]) +2 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl8/igt@kms_draw_crc@draw-method-xrgb8888-mmap-wc-ytiled.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl1/igt@kms_draw_crc@draw-method-xrgb8888-mmap-wc-ytiled.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-apl:          [PASS][19] -> [DMESG-WARN][20] ([i915#180] / [i915#95])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl2/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl1/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes:
    - shard-kbl:          [PASS][21] -> [DMESG-WARN][22] ([i915#180]) +1 similar issue
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-kbl3/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-kbl6/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [PASS][23] -> [SKIP][24] ([fdo#109441])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-iclb8/igt@kms_psr@psr2_sprite_plane_move.html

  
#### Possible fixes ####

  * {igt@gem_exec_reloc@basic-concurrent16}:
    - shard-snb:          [FAIL][25] ([i915#1930]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-snb6/igt@gem_exec_reloc@basic-concurrent16.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-snb4/igt@gem_exec_reloc@basic-concurrent16.html

  * igt@gem_exec_reloc@basic-cpu-read:
    - shard-glk:          [TIMEOUT][27] ([i915#1958]) -> [PASS][28] +2 similar issues
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-glk9/igt@gem_exec_reloc@basic-cpu-read.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-glk5/igt@gem_exec_reloc@basic-cpu-read.html

  * igt@gen9_exec_parse@allowed-all:
    - shard-glk:          [DMESG-WARN][29] ([i915#1436] / [i915#716]) -> [PASS][30]
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-glk7/igt@gen9_exec_parse@allowed-all.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-glk4/igt@gen9_exec_parse@allowed-all.html
    - shard-kbl:          [DMESG-WARN][31] ([i915#1436] / [i915#716]) -> [PASS][32]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-kbl6/igt@gen9_exec_parse@allowed-all.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-kbl7/igt@gen9_exec_parse@allowed-all.html

  * igt@kms_big_fb@y-tiled-64bpp-rotate-180:
    - shard-glk:          [FAIL][33] ([i915#1119] / [i915#118] / [i915#95]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-glk8/igt@kms_big_fb@y-tiled-64bpp-rotate-180.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-glk2/igt@kms_big_fb@y-tiled-64bpp-rotate-180.html

  * igt@kms_color@pipe-b-degamma:
    - shard-hsw:          [DMESG-WARN][35] ([i915#1927]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-hsw1/igt@kms_color@pipe-b-degamma.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-hsw4/igt@kms_color@pipe-b-degamma.html

  * igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen:
    - shard-kbl:          [FAIL][37] ([i915#54] / [i915#93] / [i915#95]) -> [PASS][38] +3 similar issues
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-kbl2/igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-kbl3/igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen.html

  * igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge:
    - shard-apl:          [FAIL][39] ([i915#70] / [i915#95]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl6/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl7/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html
    - shard-kbl:          [FAIL][41] ([i915#70] / [i915#93] / [i915#95]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-kbl6/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-kbl3/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html

  * igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled:
    - shard-kbl:          [FAIL][43] ([i915#177] / [i915#52] / [i915#54] / [i915#93] / [i915#95]) -> [PASS][44]
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-kbl2/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-kbl2/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html

  * igt@kms_draw_crc@fill-fb:
    - shard-apl:          [FAIL][45] ([i915#95]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl7/igt@kms_draw_crc@fill-fb.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl7/igt@kms_draw_crc@fill-fb.html

  * igt@kms_fbcon_fbt@fbc:
    - shard-kbl:          [FAIL][47] ([i915#64] / [i915#93] / [i915#95]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-kbl2/igt@kms_fbcon_fbt@fbc.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-kbl3/igt@kms_fbcon_fbt@fbc.html
    - shard-apl:          [FAIL][49] ([i915#1525] / [i915#95]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl4/igt@kms_fbcon_fbt@fbc.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl7/igt@kms_fbcon_fbt@fbc.html

  * {igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ab-hdmi-a1-hdmi-a2}:
    - shard-glk:          [FAIL][51] ([i915#79]) -> [PASS][52]
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-glk4/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ab-hdmi-a1-hdmi-a2.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-glk9/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible@ab-hdmi-a1-hdmi-a2.html

  * {igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a1-hdmi-a2}:
    - shard-glk:          [FAIL][53] ([i915#46]) -> [PASS][54]
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-glk4/igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a1-hdmi-a2.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-glk9/igt@kms_flip@2x-flip-vs-expired-vblank@ab-hdmi-a1-hdmi-a2.html

  * {igt@kms_flip@flip-vs-suspend-interruptible@a-dp1}:
    - shard-kbl:          [DMESG-WARN][55] ([i915#180]) -> [PASS][56] +4 similar issues
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-kbl6/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-kbl6/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html

  * igt@kms_panel_fitting@atomic-fastset:
    - shard-tglb:         [FAIL][57] ([i915#83]) -> [PASS][58]
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-tglb1/igt@kms_panel_fitting@atomic-fastset.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-tglb7/igt@kms_panel_fitting@atomic-fastset.html
    - shard-iclb:         [FAIL][59] ([i915#83]) -> [PASS][60]
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-iclb3/igt@kms_panel_fitting@atomic-fastset.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-iclb7/igt@kms_panel_fitting@atomic-fastset.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
    - shard-apl:          [DMESG-WARN][61] ([i915#180]) -> [PASS][62] +3 similar issues
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl1/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl2/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html

  * igt@kms_psr@psr2_basic:
    - shard-iclb:         [SKIP][63] ([fdo#109441]) -> [PASS][64]
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-iclb1/igt@kms_psr@psr2_basic.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-iclb2/igt@kms_psr@psr2_basic.html

  * igt@kms_vblank@pipe-c-wait-idle-hang:
    - shard-apl:          [TIMEOUT][65] ([i915#1635]) -> [PASS][66] +2 similar issues
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl6/igt@kms_vblank@pipe-c-wait-idle-hang.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl6/igt@kms_vblank@pipe-c-wait-idle-hang.html

  * {igt@perf@blocking-parameterized}:
    - shard-hsw:          [FAIL][67] ([i915#1542]) -> [PASS][68]
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-hsw8/igt@perf@blocking-parameterized.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-hsw1/igt@perf@blocking-parameterized.html

  
#### Warnings ####

  * igt@i915_pm_dc@dc6-psr:
    - shard-tglb:         [FAIL][69] ([i915#1899]) -> [SKIP][70] ([i915#468])
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-tglb5/igt@i915_pm_dc@dc6-psr.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-tglb2/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_pm_rc6_residency@rc6-idle:
    - shard-iclb:         [WARN][71] ([i915#1515]) -> [FAIL][72] ([i915#1515])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-iclb3/igt@i915_pm_rc6_residency@rc6-idle.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-iclb2/igt@i915_pm_rc6_residency@rc6-idle.html

  * igt@kms_content_protection@atomic:
    - shard-apl:          [FAIL][73] ([fdo#110321] / [fdo#110336]) -> [TIMEOUT][74] ([i915#1319] / [i915#1635])
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl1/igt@kms_content_protection@atomic.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl1/igt@kms_content_protection@atomic.html

  * igt@kms_content_protection@atomic-dpms:
    - shard-apl:          [TIMEOUT][75] ([i915#1319] / [i915#1635]) -> [FAIL][76] ([fdo#110321] / [fdo#110336])
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl8/igt@kms_content_protection@atomic-dpms.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl1/igt@kms_content_protection@atomic-dpms.html

  * igt@kms_content_protection@srm:
    - shard-apl:          [TIMEOUT][77] ([i915#1319] / [i915#1635]) -> [FAIL][78] ([fdo#110321])
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl6/igt@kms_content_protection@srm.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl7/igt@kms_content_protection@srm.html

  * igt@kms_cursor_legacy@cursora-vs-flipb-toggle:
    - shard-glk:          [DMESG-WARN][79] ([i915#1926]) -> [DMESG-FAIL][80] ([i915#1925] / [i915#1926])
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-glk5/igt@kms_cursor_legacy@cursora-vs-flipb-toggle.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-glk4/igt@kms_cursor_legacy@cursora-vs-flipb-toggle.html

  * igt@kms_cursor_legacy@cursorb-vs-flipa-toggle:
    - shard-glk:          [DMESG-FAIL][81] ([i915#1925] / [i915#1926]) -> [DMESG-WARN][82] ([i915#1926])
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-glk7/igt@kms_cursor_legacy@cursorb-vs-flipa-toggle.html
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-glk5/igt@kms_cursor_legacy@cursorb-vs-flipa-toggle.html

  * igt@kms_plane_alpha_blend@pipe-c-alpha-basic:
    - shard-apl:          [FAIL][83] ([fdo#108145] / [i915#265]) -> [FAIL][84] ([fdo#108145] / [i915#265] / [i915#95])
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl1/igt@kms_plane_alpha_blend@pipe-c-alpha-basic.html
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl7/igt@kms_plane_alpha_blend@pipe-c-alpha-basic.html

  * igt@kms_plane_lowres@pipe-d-tiling-none:
    - shard-apl:          [SKIP][85] ([fdo#109271]) -> [TIMEOUT][86] ([i915#1635])
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-apl7/igt@kms_plane_lowres@pipe-d-tiling-none.html
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-apl4/igt@kms_plane_lowres@pipe-d-tiling-none.html
    - shard-glk:          [SKIP][87] ([fdo#109271]) -> [TIMEOUT][88] ([i915#1958])
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-glk8/igt@kms_plane_lowres@pipe-d-tiling-none.html
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-glk1/igt@kms_plane_lowres@pipe-d-tiling-none.html

  * igt@kms_psr2_su@page_flip:
    - shard-iclb:         [SKIP][89] ([fdo#109642] / [fdo#111068]) -> [FAIL][90] ([i915#608])
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/IGT_5690/shard-iclb1/igt@kms_psr2_su@page_flip.html
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/shard-iclb2/igt@kms_psr2_su@page_flip.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#110321]: https://bugs.freedesktop.org/show_bug.cgi?id=110321
  [fdo#110336]: https://bugs.freedesktop.org/show_bug.cgi?id=110336
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [i915#1119]: https://gitlab.freedesktop.org/drm/intel/issues/1119
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1319]: https://gitlab.freedesktop.org/drm/intel/issues/1319
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#151]: https://gitlab.freedesktop.org/drm/intel/issues/151
  [i915#1515]: https://gitlab.freedesktop.org/drm/intel/issues/1515
  [i915#1525]: https://gitlab.freedesktop.org/drm/intel/issues/1525
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#177]: https://gitlab.freedesktop.org/drm/intel/issues/177
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1899]: https://gitlab.freedesktop.org/drm/intel/issues/1899
  [i915#1925]: https://gitlab.freedesktop.org/drm/intel/issues/1925
  [i915#1926]: https://gitlab.freedesktop.org/drm/intel/issues/1926
  [i915#1927]: https://gitlab.freedesktop.org/drm/intel/issues/1927
  [i915#1930]: https://gitlab.freedesktop.org/drm/intel/issues/1930
  [i915#1958]: https://gitlab.freedesktop.org/drm/intel/issues/1958
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#46]: https://gitlab.freedesktop.org/drm/intel/issues/46
  [i915#468]: https://gitlab.freedesktop.org/drm/intel/issues/468
  [i915#52]: https://gitlab.freedesktop.org/drm/intel/issues/52
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#608]: https://gitlab.freedesktop.org/drm/intel/issues/608
  [i915#61]: https://gitlab.freedesktop.org/drm/intel/issues/61
  [i915#64]: https://gitlab.freedesktop.org/drm/intel/issues/64
  [i915#70]: https://gitlab.freedesktop.org/drm/intel/issues/70
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#83]: https://gitlab.freedesktop.org/drm/intel/issues/83
  [i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (8 -> 8)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5690 -> IGTPW_4631

  CI-20190529: 20190529
  CI_DRM_8566: fed6b89dd6f3c4e2e909805815c5728b1fd65ce5 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_4631: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/index.html
  IGT_5690: bea881189520a9cccbb1c1cb454ac5b6fdaea40e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4631/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for i915/gem_exec_schedule: Try to spot unfairness (rev2)
  2020-06-01 19:08 ` [igt-dev] " Chris Wilson
                   ` (6 preceding siblings ...)
  (?)
@ 2020-06-02  3:59 ` Patchwork
  -1 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2020-06-02  3:59 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: i915/gem_exec_schedule: Try to spot unfairness (rev2)
URL   : https://patchwork.freedesktop.org/series/77887/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8567_full -> IGTPW_4632_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_4632_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_4632_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_4632_full:

### IGT changes ###

#### Possible regressions ####

  * {igt@gem_exec_schedule@fairness@bcs0} (NEW):
    - shard-iclb:         NOTRUN -> [INCOMPLETE][1] +1 similar issue
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-iclb8/igt@gem_exec_schedule@fairness@bcs0.html

  * {igt@gem_exec_schedule@fairness@rcs0} (NEW):
    - shard-iclb:         NOTRUN -> [FAIL][2] +1 similar issue
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-iclb8/igt@gem_exec_schedule@fairness@rcs0.html
    - shard-glk:          NOTRUN -> [FAIL][3]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-glk5/igt@gem_exec_schedule@fairness@rcs0.html

  * {igt@gem_exec_schedule@unfairness@bcs0} (NEW):
    - shard-tglb:         NOTRUN -> [INCOMPLETE][4] +1 similar issue
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-tglb8/igt@gem_exec_schedule@unfairness@bcs0.html

  * {igt@gem_exec_schedule@unfairness@rcs0} (NEW):
    - shard-kbl:          NOTRUN -> [FAIL][5]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl6/igt@gem_exec_schedule@unfairness@rcs0.html

  * igt@i915_suspend@sysfs-reader:
    - shard-kbl:          [PASS][6] -> [INCOMPLETE][7]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl2/igt@i915_suspend@sysfs-reader.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl3/igt@i915_suspend@sysfs-reader.html

  
New tests
---------

  New tests have been introduced between CI_DRM_8567_full and IGTPW_4632_full:

### New IGT tests (11) ###

  * igt@gem_exec_schedule@fairness:
    - Statuses : 2 skip(s)
    - Exec time: [0.0] s

  * igt@gem_exec_schedule@fairness@bcs0:
    - Statuses : 2 incomplete(s) 2 pass(s)
    - Exec time: [0.0, 13.38] s

  * igt@gem_exec_schedule@fairness@rcs0:
    - Statuses : 2 fail(s) 2 pass(s)
    - Exec time: [3.38, 13.39] s

  * igt@gem_exec_schedule@fairness@vcs0:
    - Statuses : 2 pass(s)
    - Exec time: [13.35, 13.38] s

  * igt@gem_exec_schedule@fairness@vecs0:
    - Statuses : 2 pass(s)
    - Exec time: [13.35] s

  * igt@gem_exec_schedule@unfairness:
    - Statuses : 2 skip(s)
    - Exec time: [0.0] s

  * igt@gem_exec_schedule@unfairness@bcs0:
    - Statuses : 2 incomplete(s) 1 pass(s)
    - Exec time: [0.0, 15.36] s

  * igt@gem_exec_schedule@unfairness@rcs0:
    - Statuses : 2 fail(s) 1 pass(s)
    - Exec time: [3.78, 14.49] s

  * igt@gem_exec_schedule@unfairness@vcs0:
    - Statuses : 1 pass(s)
    - Exec time: [15.55] s

  * igt@gem_exec_schedule@unfairness@vcs1:
    - Statuses : 1 pass(s)
    - Exec time: [15.43] s

  * igt@gem_exec_schedule@unfairness@vecs0:
    - Statuses : 1 pass(s)
    - Exec time: [15.44] s

  

Known issues
------------

  Here are the changes found in IGTPW_4632_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_mmap_offset@open-flood:
    - shard-glk:          [PASS][8] -> [TIMEOUT][9] ([i915#1958]) +1 similar issue
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk9/igt@gem_mmap_offset@open-flood.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-glk8/igt@gem_mmap_offset@open-flood.html
    - shard-apl:          [PASS][10] -> [TIMEOUT][11] ([i915#1635]) +2 similar issues
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl2/igt@gem_mmap_offset@open-flood.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl4/igt@gem_mmap_offset@open-flood.html

  * igt@gen9_exec_parse@allowed-all:
    - shard-apl:          [PASS][12] -> [DMESG-WARN][13] ([i915#1436] / [i915#716])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@gen9_exec_parse@allowed-all.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl2/igt@gen9_exec_parse@allowed-all.html

  * igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen:
    - shard-kbl:          [PASS][14] -> [FAIL][15] ([i915#54] / [i915#93] / [i915#95])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl1/igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl3/igt@kms_cursor_crc@pipe-a-cursor-128x128-onscreen.html

  * igt@kms_cursor_crc@pipe-c-cursor-128x42-onscreen:
    - shard-kbl:          [PASS][16] -> [FAIL][17] ([i915#54])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl1/igt@kms_cursor_crc@pipe-c-cursor-128x42-onscreen.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl3/igt@kms_cursor_crc@pipe-c-cursor-128x42-onscreen.html

  * igt@kms_cursor_crc@pipe-c-cursor-64x64-random:
    - shard-apl:          [PASS][18] -> [FAIL][19] ([i915#54]) +1 similar issue
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_cursor_crc@pipe-c-cursor-64x64-random.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl8/igt@kms_cursor_crc@pipe-c-cursor-64x64-random.html

  * igt@kms_cursor_edge_walk@pipe-a-64x64-right-edge:
    - shard-kbl:          [PASS][20] -> [FAIL][21] ([i915#70] / [i915#93] / [i915#95])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl7/igt@kms_cursor_edge_walk@pipe-a-64x64-right-edge.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl1/igt@kms_cursor_edge_walk@pipe-a-64x64-right-edge.html

  * igt@kms_cursor_legacy@flip-vs-cursor-crc-legacy:
    - shard-kbl:          [PASS][22] -> [FAIL][23] ([i915#1566] / [i915#93] / [i915#95])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl7/igt@kms_cursor_legacy@flip-vs-cursor-crc-legacy.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl3/igt@kms_cursor_legacy@flip-vs-cursor-crc-legacy.html

  * igt@kms_draw_crc@draw-method-xrgb8888-blt-untiled:
    - shard-apl:          [PASS][24] -> [FAIL][25] ([i915#52] / [i915#54] / [i915#95])
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl6/igt@kms_draw_crc@draw-method-xrgb8888-blt-untiled.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl4/igt@kms_draw_crc@draw-method-xrgb8888-blt-untiled.html
    - shard-kbl:          [PASS][26] -> [FAIL][27] ([i915#177] / [i915#52] / [i915#54] / [i915#93] / [i915#95])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl6/igt@kms_draw_crc@draw-method-xrgb8888-blt-untiled.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl3/igt@kms_draw_crc@draw-method-xrgb8888-blt-untiled.html

  * igt@kms_fbcon_fbt@fbc-suspend:
    - shard-kbl:          [PASS][28] -> [DMESG-WARN][29] ([i915#180])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl6/igt@kms_fbcon_fbt@fbc-suspend.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl6/igt@kms_fbcon_fbt@fbc-suspend.html

  * igt@kms_hdmi_inject@inject-audio:
    - shard-tglb:         [PASS][30] -> [SKIP][31] ([i915#433])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-tglb8/igt@kms_hdmi_inject@inject-audio.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-tglb8/igt@kms_hdmi_inject@inject-audio.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - shard-tglb:         [PASS][32] -> [INCOMPLETE][33] ([i915#1602] / [i915#456])
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-tglb7/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-tglb8/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
    - shard-apl:          [PASS][34] -> [DMESG-WARN][35] ([i915#180])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl7/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl1/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html

  * igt@kms_plane_cursor@pipe-a-viewport-size-256:
    - shard-apl:          [PASS][36] -> [FAIL][37] ([i915#1559] / [i915#95])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl2/igt@kms_plane_cursor@pipe-a-viewport-size-256.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl6/igt@kms_plane_cursor@pipe-a-viewport-size-256.html
    - shard-kbl:          [PASS][38] -> [FAIL][39] ([i915#1559] / [i915#93] / [i915#95])
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl7/igt@kms_plane_cursor@pipe-a-viewport-size-256.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl7/igt@kms_plane_cursor@pipe-a-viewport-size-256.html

  * igt@kms_psr@psr2_primary_page_flip:
    - shard-iclb:         [PASS][40] -> [SKIP][41] ([fdo#109441]) +1 similar issue
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb2/igt@kms_psr@psr2_primary_page_flip.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-iclb6/igt@kms_psr@psr2_primary_page_flip.html

  
#### Possible fixes ####

  * {igt@gem_ctx_isolation@preservation-s3@rcs0}:
    - shard-kbl:          [DMESG-WARN][42] ([i915#180]) -> [PASS][43]
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl6/igt@gem_ctx_isolation@preservation-s3@rcs0.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl7/igt@gem_ctx_isolation@preservation-s3@rcs0.html

  * {igt@gem_exec_reloc@basic-concurrent16}:
    - shard-snb:          [FAIL][44] ([i915#1930]) -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-snb4/igt@gem_exec_reloc@basic-concurrent16.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-snb2/igt@gem_exec_reloc@basic-concurrent16.html

  * igt@gem_exec_reloc@basic-cpu-read:
    - shard-glk:          [TIMEOUT][46] ([i915#1958]) -> [PASS][47] +2 similar issues
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk5/igt@gem_exec_reloc@basic-cpu-read.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-glk7/igt@gem_exec_reloc@basic-cpu-read.html

  * igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen:
    - shard-kbl:          [FAIL][48] ([i915#54] / [i915#93] / [i915#95]) -> [PASS][49] +2 similar issues
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl3/igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl4/igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-apl:          [DMESG-WARN][50] ([i915#180]) -> [PASS][51]
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl1/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge:
    - shard-apl:          [FAIL][52] ([i915#70] / [i915#95]) -> [PASS][53]
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl7/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html
    - shard-kbl:          [FAIL][54] ([i915#70] / [i915#93] / [i915#95]) -> [PASS][55]
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl3/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl4/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html

  * igt@kms_cursor_legacy@cursor-vs-flip-varying-size:
    - shard-tglb:         [INCOMPLETE][56] -> [PASS][57]
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-tglb7/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-tglb1/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html

  * igt@kms_cursor_legacy@cursora-vs-flipb-toggle:
    - shard-glk:          [DMESG-FAIL][58] ([i915#1925] / [i915#1926]) -> [PASS][59]
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk1/igt@kms_cursor_legacy@cursora-vs-flipb-toggle.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-glk7/igt@kms_cursor_legacy@cursora-vs-flipb-toggle.html

  * igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled:
    - shard-kbl:          [FAIL][60] ([i915#177] / [i915#52] / [i915#54] / [i915#93] / [i915#95]) -> [PASS][61]
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl7/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl3/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html
    - shard-apl:          [FAIL][62] ([i915#52] / [i915#54] / [i915#95]) -> [PASS][63]
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl2/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html

  * igt@kms_draw_crc@fill-fb:
    - shard-kbl:          [FAIL][64] ([i915#52] / [i915#93] / [i915#95]) -> [PASS][65]
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl1/igt@kms_draw_crc@fill-fb.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl2/igt@kms_draw_crc@fill-fb.html
    - shard-apl:          [FAIL][66] ([i915#95]) -> [PASS][67]
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl8/igt@kms_draw_crc@fill-fb.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl7/igt@kms_draw_crc@fill-fb.html

  * igt@kms_fbcon_fbt@fbc:
    - shard-kbl:          [FAIL][68] ([i915#64] / [i915#93] / [i915#95]) -> [PASS][69]
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl3/igt@kms_fbcon_fbt@fbc.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl2/igt@kms_fbcon_fbt@fbc.html
    - shard-apl:          [FAIL][70] ([i915#1525] / [i915#95]) -> [PASS][71]
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_fbcon_fbt@fbc.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl8/igt@kms_fbcon_fbt@fbc.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-apl:          [DMESG-WARN][72] ([i915#180] / [i915#95]) -> [PASS][73]
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl2/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_panel_fitting@atomic-fastset:
    - shard-tglb:         [FAIL][74] ([i915#83]) -> [PASS][75]
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-tglb6/igt@kms_panel_fitting@atomic-fastset.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-tglb2/igt@kms_panel_fitting@atomic-fastset.html
    - shard-iclb:         [FAIL][76] ([i915#83]) -> [PASS][77]
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb4/igt@kms_panel_fitting@atomic-fastset.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-iclb6/igt@kms_panel_fitting@atomic-fastset.html

  * igt@kms_psr@psr2_primary_render:
    - shard-iclb:         [SKIP][78] ([fdo#109441]) -> [PASS][79]
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb6/igt@kms_psr@psr2_primary_render.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-iclb2/igt@kms_psr@psr2_primary_render.html

  * igt@kms_setmode@basic:
    - shard-kbl:          [FAIL][80] ([i915#31]) -> [PASS][81]
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl3/igt@kms_setmode@basic.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl1/igt@kms_setmode@basic.html

  
#### Warnings ####

  * igt@i915_pm_dc@dc3co-vpb-simulation:
    - shard-iclb:         [SKIP][82] ([i915#588]) -> [SKIP][83] ([i915#658])
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb2/igt@i915_pm_dc@dc3co-vpb-simulation.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-iclb4/igt@i915_pm_dc@dc3co-vpb-simulation.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [FAIL][84] ([i915#1899]) -> [FAIL][85] ([i915#454])
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb8/igt@i915_pm_dc@dc6-psr.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-iclb4/igt@i915_pm_dc@dc6-psr.html
    - shard-tglb:         [FAIL][86] ([i915#1899]) -> [FAIL][87] ([i915#454])
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-tglb8/igt@i915_pm_dc@dc6-psr.html
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-tglb5/igt@i915_pm_dc@dc6-psr.html

  * igt@kms_content_protection@atomic:
    - shard-apl:          [TIMEOUT][88] ([i915#1319] / [i915#1635]) -> [FAIL][89] ([fdo#110321] / [fdo#110336])
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_content_protection@atomic.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl1/igt@kms_content_protection@atomic.html

  * igt@kms_content_protection@legacy:
    - shard-apl:          [TIMEOUT][90] ([i915#1319]) -> [TIMEOUT][91] ([i915#1319] / [i915#1635])
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl6/igt@kms_content_protection@legacy.html
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl2/igt@kms_content_protection@legacy.html

  * igt@kms_content_protection@lic:
    - shard-apl:          [TIMEOUT][92] ([i915#1319] / [i915#1635]) -> [FAIL][93] ([fdo#110321])
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl8/igt@kms_content_protection@lic.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl2/igt@kms_content_protection@lic.html

  * igt@kms_content_protection@srm:
    - shard-apl:          [FAIL][94] ([fdo#110321]) -> [DMESG-FAIL][95] ([fdo#110321] / [i915#95])
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@kms_content_protection@srm.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl8/igt@kms_content_protection@srm.html

  * igt@kms_cursor_legacy@cursorb-vs-flipa-toggle:
    - shard-glk:          [DMESG-FAIL][96] ([i915#1925] / [i915#1926]) -> [DMESG-WARN][97] ([i915#1926])
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk4/igt@kms_cursor_legacy@cursorb-vs-flipa-toggle.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-glk4/igt@kms_cursor_legacy@cursorb-vs-flipa-toggle.html

  * igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-render:
    - shard-apl:          [SKIP][98] ([fdo#109271]) -> [TIMEOUT][99] ([i915#1635])
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl6/igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-render.html
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl4/igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-render.html

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max:
    - shard-kbl:          [FAIL][100] ([fdo#108145] / [i915#265]) -> [FAIL][101] ([fdo#108145] / [i915#265] / [i915#93] / [i915#95])
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl7/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max.html
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-kbl4/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max.html
    - shard-apl:          [FAIL][102] ([fdo#108145] / [i915#265]) -> [FAIL][103] ([fdo#108145] / [i915#265] / [i915#95])
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-apl6/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-max.html

  * igt@kms_psr2_su@page_flip:
    - shard-tglb:         [FAIL][104] ([i915#608]) -> [SKIP][105] ([i915#1911])
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-tglb1/igt@kms_psr2_su@page_flip.html
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-tglb8/igt@kms_psr2_su@page_flip.html
    - shard-iclb:         [FAIL][106] ([i915#608]) -> [SKIP][107] ([i915#1911])
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb2/igt@kms_psr2_su@page_flip.html
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/shard-iclb2/igt@kms_psr2_su@page_flip.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#110321]: https://bugs.freedesktop.org/show_bug.cgi?id=110321
  [fdo#110336]: https://bugs.freedesktop.org/show_bug.cgi?id=110336
  [i915#1031]: https://gitlab.freedesktop.org/drm/intel/issues/1031
  [i915#1319]: https://gitlab.freedesktop.org/drm/intel/issues/1319
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#1525]: https://gitlab.freedesktop.org/drm/intel/issues/1525
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#1559]: https://gitlab.freedesktop.org/drm/intel/issues/1559
  [i915#1566]: https://gitlab.freedesktop.org/drm/intel/issues/1566
  [i915#1602]: https://gitlab.freedesktop.org/drm/intel/issues/1602
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#177]: https://gitlab.freedesktop.org/drm/intel/issues/177
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1899]: https://gitlab.freedesktop.org/drm/intel/issues/1899
  [i915#1911]: https://gitlab.freedesktop.org/drm/intel/issues/1911
  [i915#1925]: https://gitlab.freedesktop.org/drm/intel/issues/1925
  [i915#1926]: https://gitlab.freedesktop.org/drm/intel/issues/1926
  [i915#1930]: https://gitlab.freedesktop.org/drm/intel/issues/1930
  [i915#1958]: https://gitlab.freedesktop.org/drm/intel/issues/1958
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#433]: https://gitlab.freedesktop.org/drm/intel/issues/433
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#456]: https://gitlab.freedesktop.org/drm/intel/issues/456
  [i915#52]: https://gitlab.freedesktop.org/drm/intel/issues/52
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#588]: https://gitlab.freedesktop.org/drm/intel/issues/588
  [i915#608]: https://gitlab.freedesktop.org/drm/intel/issues/608
  [i915#64]: https://gitlab.freedesktop.org/drm/intel/issues/64
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#70]: https://gitlab.freedesktop.org/drm/intel/issues/70
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#83]: https://gitlab.freedesktop.org/drm/intel/issues/83
  [i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (11 -> 8)
------------------------------

  Missing    (3): pig-skl-6260u pig-glk-j5005 pig-icl-1065g7 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5690 -> IGTPW_4632
  * Piglit: piglit_4509 -> None

  CI-20190529: 20190529
  CI_DRM_8567: d36c7a9807541df70739a5917cbbab42fdf66a29 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_4632: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/index.html
  IGT_5690: bea881189520a9cccbb1c1cb454ac5b6fdaea40e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4632/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for i915/gem_exec_schedule: Try to spot unfairness (rev3)
  2020-06-01 19:08 ` [igt-dev] " Chris Wilson
                   ` (7 preceding siblings ...)
  (?)
@ 2020-06-02  6:11 ` Patchwork
  -1 siblings, 0 replies; 22+ messages in thread
From: Patchwork @ 2020-06-02  6:11 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: i915/gem_exec_schedule: Try to spot unfairness (rev3)
URL   : https://patchwork.freedesktop.org/series/77887/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8567_full -> IGTPW_4633_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_4633_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_4633_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_4633_full:

### IGT changes ###

#### Possible regressions ####

  * {igt@gem_exec_schedule@fairness@bcs0} (NEW):
    - shard-iclb:         NOTRUN -> [INCOMPLETE][1] +1 similar issue
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-iclb8/igt@gem_exec_schedule@fairness@bcs0.html

  * {igt@gem_exec_schedule@fairness@rcs0} (NEW):
    - shard-iclb:         NOTRUN -> [FAIL][2] +1 similar issue
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-iclb8/igt@gem_exec_schedule@fairness@rcs0.html

  * {igt@gem_exec_schedule@unfairness@bcs0} (NEW):
    - shard-kbl:          NOTRUN -> [FAIL][3] +4 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl6/igt@gem_exec_schedule@unfairness@bcs0.html
    - shard-glk:          NOTRUN -> [FAIL][4] +3 similar issues
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-glk1/igt@gem_exec_schedule@unfairness@bcs0.html
    - shard-tglb:         NOTRUN -> [INCOMPLETE][5] +1 similar issue
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-tglb5/igt@gem_exec_schedule@unfairness@bcs0.html

  * {igt@gem_exec_schedule@unfairness@rcs0} (NEW):
    - shard-tglb:         NOTRUN -> [FAIL][6]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-tglb5/igt@gem_exec_schedule@unfairness@rcs0.html

  * {igt@gem_exec_schedule@unfairness@vcs0} (NEW):
    - shard-apl:          NOTRUN -> [FAIL][7] +3 similar issues
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl7/igt@gem_exec_schedule@unfairness@vcs0.html

  * igt@kms_plane@plane-panning-top-left-pipe-c-planes:
    - shard-kbl:          [PASS][8] -> [INCOMPLETE][9]
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl2/igt@kms_plane@plane-panning-top-left-pipe-c-planes.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl2/igt@kms_plane@plane-panning-top-left-pipe-c-planes.html

  
New tests
---------

  New tests have been introduced between CI_DRM_8567_full and IGTPW_4633_full:

### New IGT tests (12) ###

  * igt@gem_exec_schedule@fairness:
    - Statuses : 2 skip(s)
    - Exec time: [0.0] s

  * igt@gem_exec_schedule@fairness@bcs0:
    - Statuses : 2 incomplete(s) 3 pass(s)
    - Exec time: [0.0, 13.41] s

  * igt@gem_exec_schedule@fairness@rcs0:
    - Statuses : 1 fail(s) 4 pass(s)
    - Exec time: [3.35, 13.40] s

  * igt@gem_exec_schedule@fairness@vcs0:
    - Statuses : 3 pass(s)
    - Exec time: [13.22, 13.40] s

  * igt@gem_exec_schedule@fairness@vcs1:
    - Statuses : 1 pass(s)
    - Exec time: [13.23] s

  * igt@gem_exec_schedule@fairness@vecs0:
    - Statuses : 3 pass(s)
    - Exec time: [13.25, 13.36] s

  * igt@gem_exec_schedule@unfairness:
    - Statuses : 2 skip(s)
    - Exec time: [0.0] s

  * igt@gem_exec_schedule@unfairness@bcs0:
    - Statuses : 3 fail(s) 2 incomplete(s)
    - Exec time: [0.0, 15.31] s

  * igt@gem_exec_schedule@unfairness@rcs0:
    - Statuses : 5 fail(s)
    - Exec time: [3.98, 14.85] s

  * igt@gem_exec_schedule@unfairness@vcs0:
    - Statuses : 3 fail(s)
    - Exec time: [11.38, 15.42] s

  * igt@gem_exec_schedule@unfairness@vcs1:
    - Statuses : 1 fail(s)
    - Exec time: [15.39] s

  * igt@gem_exec_schedule@unfairness@vecs0:
    - Statuses : 3 fail(s)
    - Exec time: [11.37, 11.58] s

  

Known issues
------------

  Here are the changes found in IGTPW_4633_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_render_copy@x-tiled:
    - shard-glk:          [PASS][10] -> [FAIL][11] ([i915#1927])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk7/igt@gem_render_copy@x-tiled.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-glk8/igt@gem_render_copy@x-tiled.html

  * igt@i915_suspend@fence-restore-untiled:
    - shard-kbl:          [PASS][12] -> [INCOMPLETE][13] ([i915#155]) +1 similar issue
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl4/igt@i915_suspend@fence-restore-untiled.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl4/igt@i915_suspend@fence-restore-untiled.html

  * igt@kms_big_fb@x-tiled-64bpp-rotate-180:
    - shard-glk:          [PASS][14] -> [FAIL][15] ([i915#1119] / [i915#118] / [i915#95]) +1 similar issue
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk5/igt@kms_big_fb@x-tiled-64bpp-rotate-180.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-glk8/igt@kms_big_fb@x-tiled-64bpp-rotate-180.html

  * igt@kms_cursor_crc@pipe-a-cursor-256x85-offscreen:
    - shard-kbl:          [PASS][16] -> [FAIL][17] ([i915#54] / [i915#93] / [i915#95]) +3 similar issues
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl2/igt@kms_cursor_crc@pipe-a-cursor-256x85-offscreen.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl6/igt@kms_cursor_crc@pipe-a-cursor-256x85-offscreen.html

  * igt@kms_cursor_crc@pipe-b-cursor-suspend:
    - shard-apl:          [PASS][18] -> [DMESG-WARN][19] ([i915#180]) +2 similar issues
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@kms_cursor_crc@pipe-b-cursor-suspend.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl1/igt@kms_cursor_crc@pipe-b-cursor-suspend.html

  * igt@kms_cursor_edge_walk@pipe-a-256x256-top-edge:
    - shard-apl:          [PASS][20] -> [FAIL][21] ([i915#70] / [i915#95])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_cursor_edge_walk@pipe-a-256x256-top-edge.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl4/igt@kms_cursor_edge_walk@pipe-a-256x256-top-edge.html
    - shard-kbl:          [PASS][22] -> [FAIL][23] ([i915#70] / [i915#93] / [i915#95])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl3/igt@kms_cursor_edge_walk@pipe-a-256x256-top-edge.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl4/igt@kms_cursor_edge_walk@pipe-a-256x256-top-edge.html

  * igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic:
    - shard-glk:          [PASS][24] -> [FAIL][25] ([i915#72])
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk8/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-glk2/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-indfb-draw-pwrite:
    - shard-glk:          [PASS][26] -> [FAIL][27] ([i915#49])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk9/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-indfb-draw-pwrite.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-glk4/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-indfb-draw-pwrite.html

  * igt@kms_mmap_write_crc@main:
    - shard-kbl:          [PASS][28] -> [FAIL][29] ([i915#93] / [i915#95])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl2/igt@kms_mmap_write_crc@main.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl7/igt@kms_mmap_write_crc@main.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
    - shard-kbl:          [PASS][30] -> [DMESG-WARN][31] ([i915#180])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl3/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl6/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html

  * igt@kms_plane@plane-panning-top-left-pipe-a-planes:
    - shard-apl:          [PASS][32] -> [TIMEOUT][33] ([i915#1635]) +1 similar issue
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_plane@plane-panning-top-left-pipe-a-planes.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl4/igt@kms_plane@plane-panning-top-left-pipe-a-planes.html

  * igt@kms_plane_cursor@pipe-a-overlay-size-64:
    - shard-apl:          [PASS][34] -> [FAIL][35] ([i915#1559] / [i915#95])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@kms_plane_cursor@pipe-a-overlay-size-64.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl6/igt@kms_plane_cursor@pipe-a-overlay-size-64.html
    - shard-kbl:          [PASS][36] -> [FAIL][37] ([i915#1559] / [i915#93] / [i915#95])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl1/igt@kms_plane_cursor@pipe-a-overlay-size-64.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl3/igt@kms_plane_cursor@pipe-a-overlay-size-64.html

  * igt@kms_psr@psr2_primary_page_flip:
    - shard-iclb:         [PASS][38] -> [SKIP][39] ([fdo#109441]) +1 similar issue
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb2/igt@kms_psr@psr2_primary_page_flip.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-iclb7/igt@kms_psr@psr2_primary_page_flip.html

  
#### Possible fixes ####

  * {igt@gem_ctx_isolation@preservation-s3@rcs0}:
    - shard-kbl:          [DMESG-WARN][40] ([i915#180]) -> [PASS][41] +1 similar issue
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl6/igt@gem_ctx_isolation@preservation-s3@rcs0.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl4/igt@gem_ctx_isolation@preservation-s3@rcs0.html

  * igt@gem_exec_reloc@basic-cpu-read:
    - shard-glk:          [TIMEOUT][42] ([i915#1958]) -> [PASS][43] +2 similar issues
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk5/igt@gem_exec_reloc@basic-cpu-read.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-glk6/igt@gem_exec_reloc@basic-cpu-read.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [FAIL][44] ([i915#1899]) -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb8/igt@i915_pm_dc@dc6-psr.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-iclb3/igt@i915_pm_dc@dc6-psr.html

  * igt@kms_big_fb@linear-64bpp-rotate-0:
    - shard-glk:          [FAIL][46] ([i915#1119] / [i915#118] / [i915#95]) -> [PASS][47]
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk8/igt@kms_big_fb@linear-64bpp-rotate-0.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-glk1/igt@kms_big_fb@linear-64bpp-rotate-0.html

  * igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen:
    - shard-kbl:          [FAIL][48] ([i915#54] / [i915#93] / [i915#95]) -> [PASS][49] +3 similar issues
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl3/igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl3/igt@kms_cursor_crc@pipe-a-cursor-64x64-onscreen.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-apl:          [DMESG-WARN][50] ([i915#180]) -> [PASS][51]
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl1/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge:
    - shard-apl:          [FAIL][52] ([i915#70] / [i915#95]) -> [PASS][53]
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl8/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html
    - shard-kbl:          [FAIL][54] ([i915#70] / [i915#93] / [i915#95]) -> [PASS][55]
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl3/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl1/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html

  * igt@kms_cursor_legacy@cursor-vs-flip-varying-size:
    - shard-tglb:         [INCOMPLETE][56] -> [PASS][57]
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-tglb7/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-tglb3/igt@kms_cursor_legacy@cursor-vs-flip-varying-size.html

  * igt@kms_cursor_legacy@cursora-vs-flipb-toggle:
    - shard-glk:          [DMESG-FAIL][58] ([i915#1925] / [i915#1926]) -> [PASS][59]
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk1/igt@kms_cursor_legacy@cursora-vs-flipb-toggle.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-glk8/igt@kms_cursor_legacy@cursora-vs-flipb-toggle.html

  * igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled:
    - shard-kbl:          [FAIL][60] ([i915#177] / [i915#52] / [i915#54] / [i915#93] / [i915#95]) -> [PASS][61]
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl7/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl6/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html
    - shard-apl:          [FAIL][62] ([i915#52] / [i915#54] / [i915#95]) -> [PASS][63]
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl7/igt@kms_draw_crc@draw-method-xrgb8888-pwrite-untiled.html

  * igt@kms_draw_crc@fill-fb:
    - shard-kbl:          [FAIL][64] ([i915#52] / [i915#93] / [i915#95]) -> [PASS][65]
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl1/igt@kms_draw_crc@fill-fb.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl2/igt@kms_draw_crc@fill-fb.html
    - shard-apl:          [FAIL][66] ([i915#95]) -> [PASS][67]
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl8/igt@kms_draw_crc@fill-fb.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl1/igt@kms_draw_crc@fill-fb.html

  * igt@kms_fbcon_fbt@fbc:
    - shard-kbl:          [FAIL][68] ([i915#64] / [i915#93] / [i915#95]) -> [PASS][69]
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl3/igt@kms_fbcon_fbt@fbc.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl2/igt@kms_fbcon_fbt@fbc.html
    - shard-apl:          [FAIL][70] ([i915#1525] / [i915#95]) -> [PASS][71]
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_fbcon_fbt@fbc.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl1/igt@kms_fbcon_fbt@fbc.html

  * {igt@kms_flip@flip-vs-suspend-interruptible@b-hdmi-a1}:
    - shard-hsw:          [INCOMPLETE][72] ([i915#61]) -> [PASS][73]
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-hsw8/igt@kms_flip@flip-vs-suspend-interruptible@b-hdmi-a1.html
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-hsw8/igt@kms_flip@flip-vs-suspend-interruptible@b-hdmi-a1.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-apl:          [DMESG-WARN][74] ([i915#180] / [i915#95]) -> [PASS][75]
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl7/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_panel_fitting@atomic-fastset:
    - shard-tglb:         [FAIL][76] ([i915#83]) -> [PASS][77]
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-tglb6/igt@kms_panel_fitting@atomic-fastset.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-tglb1/igt@kms_panel_fitting@atomic-fastset.html
    - shard-iclb:         [FAIL][78] ([i915#83]) -> [PASS][79]
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb4/igt@kms_panel_fitting@atomic-fastset.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-iclb3/igt@kms_panel_fitting@atomic-fastset.html

  * igt@kms_psr@psr2_suspend:
    - shard-iclb:         [SKIP][80] ([fdo#109441]) -> [PASS][81] +2 similar issues
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb5/igt@kms_psr@psr2_suspend.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-iclb2/igt@kms_psr@psr2_suspend.html

  
#### Warnings ####

  * igt@i915_pm_dc@dc3co-vpb-simulation:
    - shard-iclb:         [SKIP][82] ([i915#588]) -> [SKIP][83] ([i915#658])
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb2/igt@i915_pm_dc@dc3co-vpb-simulation.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-iclb6/igt@i915_pm_dc@dc3co-vpb-simulation.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-tglb:         [FAIL][84] ([i915#1899]) -> [FAIL][85] ([i915#454])
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-tglb8/igt@i915_pm_dc@dc6-psr.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-tglb5/igt@i915_pm_dc@dc6-psr.html

  * igt@kms_big_fb@yf-tiled-64bpp-rotate-270:
    - shard-apl:          [SKIP][86] ([fdo#109271]) -> [TIMEOUT][87] ([i915#1635]) +1 similar issue
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl6/igt@kms_big_fb@yf-tiled-64bpp-rotate-270.html
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl4/igt@kms_big_fb@yf-tiled-64bpp-rotate-270.html

  * igt@kms_content_protection@atomic:
    - shard-apl:          [TIMEOUT][88] ([i915#1319] / [i915#1635]) -> [TIMEOUT][89] ([i915#1319])
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_content_protection@atomic.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl8/igt@kms_content_protection@atomic.html

  * igt@kms_content_protection@legacy:
    - shard-apl:          [TIMEOUT][90] ([i915#1319]) -> [TIMEOUT][91] ([i915#1319] / [i915#1635])
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl6/igt@kms_content_protection@legacy.html
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl6/igt@kms_content_protection@legacy.html

  * igt@kms_content_protection@lic:
    - shard-apl:          [TIMEOUT][92] ([i915#1319] / [i915#1635]) -> [FAIL][93] ([fdo#110321] / [i915#95])
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl8/igt@kms_content_protection@lic.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl8/igt@kms_content_protection@lic.html

  * igt@kms_content_protection@srm:
    - shard-apl:          [FAIL][94] ([fdo#110321]) -> [TIMEOUT][95] ([i915#1319] / [i915#1635])
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl4/igt@kms_content_protection@srm.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl6/igt@kms_content_protection@srm.html

  * igt@kms_content_protection@uevent:
    - shard-kbl:          [FAIL][96] ([i915#357]) -> [FAIL][97] ([i915#357] / [i915#93] / [i915#95])
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl7/igt@kms_content_protection@uevent.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl7/igt@kms_content_protection@uevent.html
    - shard-apl:          [FAIL][98] ([i915#357]) -> [FAIL][99] ([i915#357] / [i915#95])
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl1/igt@kms_content_protection@uevent.html
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl6/igt@kms_content_protection@uevent.html

  * igt@kms_cursor_legacy@cursorb-vs-flipa-toggle:
    - shard-glk:          [DMESG-FAIL][100] ([i915#1925] / [i915#1926]) -> [DMESG-WARN][101] ([i915#1926])
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-glk4/igt@kms_cursor_legacy@cursorb-vs-flipa-toggle.html
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-glk1/igt@kms_cursor_legacy@cursorb-vs-flipa-toggle.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-basic:
    - shard-apl:          [FAIL][102] ([fdo#108145] / [i915#265]) -> [FAIL][103] ([fdo#108145] / [i915#265] / [i915#95])
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-apl6/igt@kms_plane_alpha_blend@pipe-b-alpha-basic.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-apl7/igt@kms_plane_alpha_blend@pipe-b-alpha-basic.html
    - shard-kbl:          [FAIL][104] ([fdo#108145] / [i915#265]) -> [FAIL][105] ([fdo#108145] / [i915#265] / [i915#93] / [i915#95])
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-kbl6/igt@kms_plane_alpha_blend@pipe-b-alpha-basic.html
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-kbl6/igt@kms_plane_alpha_blend@pipe-b-alpha-basic.html

  * igt@kms_psr2_su@page_flip:
    - shard-iclb:         [FAIL][106] ([i915#608]) -> [SKIP][107] ([fdo#109642] / [fdo#111068])
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8567/shard-iclb2/igt@kms_psr2_su@page_flip.html
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/shard-iclb7/igt@kms_psr2_su@page_flip.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#110321]: https://bugs.freedesktop.org/show_bug.cgi?id=110321
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [i915#1119]: https://gitlab.freedesktop.org/drm/intel/issues/1119
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1319]: https://gitlab.freedesktop.org/drm/intel/issues/1319
  [i915#1525]: https://gitlab.freedesktop.org/drm/intel/issues/1525
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#155]: https://gitlab.freedesktop.org/drm/intel/issues/155
  [i915#1559]: https://gitlab.freedesktop.org/drm/intel/issues/1559
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#177]: https://gitlab.freedesktop.org/drm/intel/issues/177
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1899]: https://gitlab.freedesktop.org/drm/intel/issues/1899
  [i915#1925]: https://gitlab.freedesktop.org/drm/intel/issues/1925
  [i915#1926]: https://gitlab.freedesktop.org/drm/intel/issues/1926
  [i915#1927]: https://gitlab.freedesktop.org/drm/intel/issues/1927
  [i915#1928]: https://gitlab.freedesktop.org/drm/intel/issues/1928
  [i915#1930]: https://gitlab.freedesktop.org/drm/intel/issues/1930
  [i915#1958]: https://gitlab.freedesktop.org/drm/intel/issues/1958
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#357]: https://gitlab.freedesktop.org/drm/intel/issues/357
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#49]: https://gitlab.freedesktop.org/drm/intel/issues/49
  [i915#52]: https://gitlab.freedesktop.org/drm/intel/issues/52
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#588]: https://gitlab.freedesktop.org/drm/intel/issues/588
  [i915#608]: https://gitlab.freedesktop.org/drm/intel/issues/608
  [i915#61]: https://gitlab.freedesktop.org/drm/intel/issues/61
  [i915#64]: https://gitlab.freedesktop.org/drm/intel/issues/64
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#70]: https://gitlab.freedesktop.org/drm/intel/issues/70
  [i915#72]: https://gitlab.freedesktop.org/drm/intel/issues/72
  [i915#83]: https://gitlab.freedesktop.org/drm/intel/issues/83
  [i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (11 -> 8)
------------------------------

  Missing    (3): pig-skl-6260u pig-glk-j5005 pig-icl-1065g7 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5690 -> IGTPW_4633
  * Piglit: piglit_4509 -> None

  CI-20190529: 20190529
  CI_DRM_8567: d36c7a9807541df70739a5917cbbab42fdf66a29 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_4633: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/index.html
  IGT_5690: bea881189520a9cccbb1c1cb454ac5b6fdaea40e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4633/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-12-16 15:24 Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-12-16 15:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

v2: igt_assert_f and more commentary; exclude vip from client stats,
include range of frame intervals from each individual client
v3: Write down what the test actually does!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 797 +++++++++++++++++++++++++++++++++
 1 file changed, 797 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index dd15b2ac7..8be5539aa 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
 #include <sys/poll.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <sys/resource.h>
 #include <sys/syscall.h>
 #include <sched.h>
 #include <signal.h>
@@ -2532,12 +2533,250 @@ static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
 	return (x + y - 1) / y;
 }
 
+static uint64_t ns_to_ctx_ticks(int i915, uint64_t ns)
+{
+	int f = read_timestamp_frequency(i915);
+	if (intel_gen(intel_get_drm_devid(i915)) == 11)
+		f = 12500000; /* icl!!! are you feeling alright? CTX vs CS */
+	return div64_u64_round_up(ns * f, NSEC_PER_SEC);
+}
+
 static uint64_t ticks_to_ns(int i915, uint64_t ticks)
 {
 	return div64_u64_round_up(ticks * NSEC_PER_SEC,
 				  read_timestamp_frequency(i915));
 }
 
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr,
+		  uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	/* Loop until CTX_TIMESTAMP - initial > @ns */
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(START_TS);
+
+	while (offset_in_page(cs) & 63)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(NOW_TS);
+
+	/* delta = now - start; inverted to match COND_BBE */
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	/* Save delta for reading by COND_BBE */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Delay between SRM and COND_BBE to post the writes */
+	for (int n = 0; n < 8; n++) {
+		*cs++ = MI_STORE_DWORD_IMM;
+		if (use_64b) {
+			*cs++ = addr + 4064;
+			*cs++ = addr >> 32;
+		} else {
+			*cs++ = 0;
+			*cs++ = addr + 4064;
+		}
+		*cs++ = 0;
+	}
+
+	/* Break if delta [time elapsed] > ns */
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ctx_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Otherwise back to recalculating delta */
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	obj.offset = obj.handle << 12;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void tslog(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+	enum { INC, MASK, ADDR };
+	uint32_t *timestamp_lo, *addr_lo;
+	uint32_t *map, *cs;
+
+	igt_require(base);
+
+	map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	cs = map + 512;
+
+	/* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_TIMESTAMP;
+	timestamp_lo = cs;
+	*cs++ = addr;
+	*cs++ = addr >> 32;
+
+	/* Load the address + inc & mask variables */
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR);
+	addr_lo = cs;
+	*cs++ = addr;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR) + 4;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(INC);
+	*cs++ = 4;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(INC) + 4;
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK);
+	*cs++ = 0xfffff7ff;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK) + 4;
+	*cs++ = 0xffffffff;
+
+	/* Increment the [ring] address for saving CS_TIMESTAMP */
+	*cs++ = MI_MATH(8);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+	*cs++ = MI_MATH_AND;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+	/* Rewrite the batch buffer for the next execution */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(timestamp_lo);
+	*cs++ = addr >> 32;
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(addr_lo);
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	obj.offset = obj.handle << 12;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	tslog(i915, e, obj.handle, obj.offset);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
 static int cmp_u32(const void *A, const void *B)
 {
 	const uint32_t *a = A, *b = B;
@@ -2550,6 +2789,560 @@ static int cmp_u32(const void *A, const void *B)
 		return 0;
 }
 
+static bool has_ctx_timestamp(int i915, const struct intel_execution_engine2 *e)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+
+	if (gen == 8 && e->class == I915_ENGINE_CLASS_VIDEO)
+		return false; /* looks fubar */
+
+	return true;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+	const struct intel_execution_engine2 *e;
+	unsigned int count = 0;
+
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		count++;
+	}
+	if (!count)
+		return *not;
+
+	count = rand() % count;
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		if (!count--)
+			break;
+	}
+
+	return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeline,
+		       uint32_t common,
+		       unsigned int flags,
+		       unsigned long *ctl,
+		       unsigned long *median,
+		       unsigned long *iqr)
+#define F_SYNC		(1 << 0)
+#define F_PACE		(1 << 1)
+#define F_FLOW		(1 << 2)
+#define F_HALF		(1 << 3)
+#define F_SOLO		(1 << 4)
+#define F_SPARE		(1 << 5)
+#define F_NEXT		(1 << 6)
+#define F_VIP		(1 << 7)
+#define F_RRUL		(1 << 8)
+#define F_SHARE		(1 << 9)
+#define F_PING		(1 << 10)
+#define F_THROTTLE	(1 << 11)
+#define F_ISOLATE	(1 << 12)
+{
+	const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+	struct drm_i915_gem_exec_object2 obj[4] = {
+		{},
+		{
+			.handle = common ?: gem_create(i915, 4096),
+		},
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+	};
+	struct intel_execution_engine2 ping = *e;
+	int p_fence = -1, n_fence = -1;
+	unsigned long count = 0;
+	int n;
+
+	srandom(getpid());
+	if (flags & F_PING)
+		ping = pick_random_engine(i915, e);
+	obj[0] = tslog_create(i915, ctx, &ping);
+
+	while (!READ_ONCE(*ctl)) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 3,
+			.rsvd1 = ctx,
+			.rsvd2 = -1,
+			.flags = e->flags,
+		};
+
+		if (flags & F_FLOW) {
+			unsigned int seq;
+
+			seq = count;
+			if (flags & F_NEXT)
+				seq++;
+
+			execbuf.rsvd2 =
+				sw_sync_timeline_create_fence(timeline, seq);
+			execbuf.flags |= I915_EXEC_FENCE_IN;
+		}
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+		for (n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+		close(execbuf.rsvd2);
+
+		execbuf.buffer_count = 1;
+		execbuf.batch_start_offset = 2048;
+		execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+		execbuf.rsvd2 = n_fence;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACE && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+
+		if (flags & F_SYNC) {
+			struct pollfd pfd = {
+				.fd = n_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+
+		if (flags & F_THROTTLE)
+			igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+		igt_swap(obj[2], obj[3]);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	close(p_fence);
+
+	gem_close(i915, obj[3].handle);
+	gem_close(i915, obj[2].handle);
+	if (obj[1].handle != common)
+		gem_close(i915, obj[1].handle);
+
+	gem_sync(i915, obj[0].handle);
+	if (median) {
+		uint32_t *map;
+
+		/*
+		 * We recorded the CS_TIMESTAMP of each frame, and if
+		 * the GPU is being shared completely fairly, we expect
+		 * each frame to be at the same interval from the last.
+		 *
+		 * Compute the interval between frames and report back
+		 * both the median interval and the range for this client.
+		 */
+
+		map = gem_mmap__device_coherent(i915, obj[0].handle,
+						0, 4096, PROT_WRITE);
+		for (n = 1; n < min(count, 512); n++) {
+			igt_assert(map[n]);
+			map[n - 1] = map[n] - map[n - 1];
+		}
+		qsort(map, --n, sizeof(*map), cmp_u32);
+		*iqr = ticks_to_ns(i915, map[(3 * n + 3) / 4] - map[n / 4]);
+		*median = ticks_to_ns(i915, map[n / 2]);
+		munmap(map, 4096);
+	}
+	gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+	uint64_t cpu_time = 0;
+
+	cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+	cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+	return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+	struct timespec tv = { .tv_nsec = delay_ns };
+	nanosleep(&tv, NULL);
+	sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+	unsigned long *result, *iqr;
+	uint32_t common = 0;
+
+	igt_require(has_ctx_timestamp(i915, e));
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+	if (flags & F_SHARE)
+		common = gem_create(i915, 4095);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(result != MAP_FAILED);
+	iqr = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(iqr != MAP_FAILED);
+
+	/*
+	 * The combined workload always runs at a 60fps target (unless F_HALF!).
+	 * This gives a frame of interval of 16ms that is evenly split across
+	 * all the clients, so simulating a system with a bunch of clients that
+	 * are perfectly balanced and can sustain 60fps. Our job is to ensure
+	 * that each client does run at a smooth 60fps.
+	 *
+	 * Each client runs a fixed length delay loop (as a single request,
+	 * or split into 3) and then records the CS_TIMESTAMP after completing
+	 * its delay. Given a fair allotment of GPU time to each client,
+	 * that timestamp will [ideally] be at a precise 16ms intervals.
+	 * In practice, time is wasted on context switches, so as the number
+	 * of clients increases, the proprotion of time spent on context
+	 * switches grows. As we get to 64 render clients, we will be spending
+	 * as much time in context switches as executing the client workloads.
+	 *
+	 * Each client frame may be paced by some throttling technique found
+	 * in the wild. i.e. each client may wait until a simulated vblank
+	 * to indicate the start of a new frame, or it may wait until the
+	 * completion of a previous frame. This causes submission from each
+	 * client and across the system to be chunky and uneven.
+	 *
+	 * We look at the variation of frame intervals within each client, and
+	 * the variation of the medians across the clients to see if the
+	 * distribution (budget) of GPU time was fair enough.
+	 *
+	 * Alternative (and important) metrics will be more latency centric;
+	 * looking at how well we can sustain meeting deadline given competition
+	 * by clients for the GPU.
+	 */
+
+	for (int n = 2; n <= 256; n <<= 1) { /* 32 == 500us per client */
+		int timeline = sw_sync_timeline_create();
+		int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+		int nchild = n - 1; /* odd for easy medians */
+		const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+		const int lo = nchild / 4;
+		const int hi = (3 * nchild + 3) / 4 - 1;
+		struct rusage old_usage, usage;
+		uint64_t cpu_time, d_time;
+		struct timespec tv;
+		struct igt_mean m;
+
+		memset(result, 0, (nchild + 1) * sizeof(result[0]));
+
+		if (flags & F_PING) { /* fill the others with light bg load */
+			struct intel_execution_engine2 *ping;
+
+			__for_each_physical_engine(i915, ping) {
+				if (ping->flags == e->flags)
+					continue;
+
+				igt_fork(child, 1) {
+					uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+					fair_child(i915, ctx, ping,
+						   child_ns / 8,
+						   -1, common,
+						   F_SOLO | F_PACE | F_SHARE,
+						   &result[nchild],
+						   NULL, NULL);
+
+					gem_context_destroy(i915, ctx);
+				}
+			}
+		}
+
+		getrusage(RUSAGE_CHILDREN, &old_usage);
+		igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+		igt_fork(child, nchild) {
+			uint32_t ctx;
+
+			if (flags & F_ISOLATE) {
+				int clone, dmabuf = -1;
+
+				if (common)
+					dmabuf = prime_handle_to_fd(i915, common);
+
+				clone = gem_reopen_driver(i915);
+				gem_context_copy_engines(i915, 0, clone, 0);
+				i915 = clone;
+
+				if (dmabuf != -1)
+					common = prime_fd_to_handle(i915, dmabuf);
+			}
+
+			ctx = gem_context_clone_with_engines(i915, 0);
+
+			if (flags & F_VIP && child == 0) {
+				gem_context_set_priority(i915, ctx, MAX_PRIO);
+				flags |= F_FLOW;
+			}
+			if (flags & F_RRUL && child == 0)
+				flags |= F_SOLO | F_FLOW | F_SYNC;
+
+			fair_child(i915, ctx, e, child_ns,
+				   timeline, common, flags,
+				   &result[nchild],
+				   &result[child], &iqr[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		while (nfences--)
+			timeline_advance(timeline, fence_ns);
+
+		result[nchild] = 1;
+		for (int child = 0; child < nchild; child++) {
+			while (!READ_ONCE(result[child]))
+				timeline_advance(timeline, fence_ns);
+		}
+
+		igt_waitchildren();
+		close(timeline);
+
+		/*
+		 * Are we running out of CPU time, and fail to submit frames?
+		 *
+		 * We try to rule out any undue impact on the GPU scheduling
+		 * from the CPU scheduler by looking for core saturation. If
+		 * we may be in a situation where the clients + kernel are
+		 * taking a whole core (think lockdep), then it is increasingly
+		 * likely that our measurements include delays from the CPU
+		 * scheduler. Err on the side of caution.
+		 */
+		d_time = igt_nsec_elapsed(&tv);
+		getrusage(RUSAGE_CHILDREN, &usage);
+		cpu_time = d_cpu_time(&usage, &old_usage);
+		igt_debug("CPU usage: %.0f%%\n", 100. * cpu_time / d_time);
+		if (4 * cpu_time > 3 * d_time) {
+			if (nchild > 7) /* good enough to judge pass/fail */
+				break;
+
+			igt_skip_on_f(4 * cpu_time > 3 * d_time,
+				      "%.0f%% CPU usage, presuming capacity exceeded\n",
+				      100. * cpu_time / d_time);
+		}
+
+		/* With no contention, we should match our target frametime */
+		if (nchild == 1) {
+			igt_assert(4 * result[0] > 3 * fence_ns &&
+				   3 * result[0] < 4 * fence_ns);
+			continue;
+		}
+
+		/*
+		 * The VIP should always be able to hit the target frame rate;
+		 * regardless of budget contention from lessor clients.
+		 */
+		if (flags & (F_VIP | F_RRUL)) {
+			igt_info("VIP interval %.2fms, range %.2fms\n",
+				 1e-6 * result[0], 1e-6 * iqr[0]);
+			igt_assert_f(4 * result[0] > 3 * fence_ns &&
+				     3 * result[0] < 4 * fence_ns,
+				     "VIP expects to run exactly when it wants, expects an interval of %.2fms, was %.2fms\n",
+				     1e-6 * fence_ns, 1e-6 * result[0]);
+			igt_assert_f(2 * iqr[0] < result[0],
+				     "VIP frame IQR %.2fms exceeded median threshold %.2fms\n",
+				     1e-6 * iqr[0],
+				     1e-6 * result[0] / 2);
+			if (!--nchild)
+				continue;
+
+			/* Exclude the VIP result from the plebian statistics */
+			memmove(result, result + 1, nchild * sizeof(*result));
+			memmove(iqr, iqr + 1, nchild * sizeof(*iqr));
+		}
+
+		igt_mean_init(&m);
+		for (int child = 0; child < nchild; child++)
+			igt_mean_add(&m, result[child]);
+
+		qsort(result, nchild, sizeof(*result), cmp_ul);
+		qsort(iqr, nchild, sizeof(*iqr), cmp_ul);
+
+		/*
+		 * The target interval for median/mean is 16ms (fence_ns).
+		 * However, this work is evenly split across the clients so
+		 * the range (and median) of client medians may be much less
+		 * than 16ms [16/3N]. We present median of medians to try
+		 * and avoid any instability while running in CI; at the cost
+		 * of insensitivity!
+		 */
+		igt_info("%3d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f [%.1f, %.1f], mean: %.1f ± %.2f ms, cpu: %.0f%%\n",
+			 nchild,
+			 1e-6 * result[0],  1e-6 * result[nchild - 1],
+			 1e-6 * result[lo], 1e-6 * result[hi],
+			 1e-6 * result[nchild / 2],
+			 1e-6 * iqr[lo], 1e-6 * iqr[hi],
+			 1e-6 * igt_mean_get(&m),
+			 1e-6 * sqrt(igt_mean_get_variance(&m)),
+			 100. * cpu_time / d_time);
+
+		igt_assert_f(iqr[nchild / 2] < 2 * result[nchild / 2],
+			     "Child frame IQR %.2fms exceeded median threshold %.2fms\n",
+			     1e-6 * iqr[nchild / 2],
+			     1e-6 * result[nchild / 2] * 2);
+
+		igt_assert_f(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+			     3 * igt_mean_get(&m) < 4 * result[nchild / 2],
+			     "Mean of client interval %.2fms differs from median %.2fms, distribution is skewed\n",
+
+			     1e-6 * igt_mean_get(&m), 1e-6 * result[nchild / 2]);
+
+		igt_assert_f(result[nchild / 2] > frame_ns / 2,
+			     "Median client interval %.2fms did not match target interval %.2fms\n",
+			     1e-6 * result[nchild / 2], 1e-6 * frame_ns);
+
+
+		igt_assert_f(result[hi] - result[lo] < result[nchild / 2],
+			     "Interquartile range of client intervals %.2fms is as large as the median threshold %.2fms, clients are not evenly distributed!\n",
+			     1e-6 * (result[hi] - result[lo]),
+			     1e-6 * result[nchild / 2]);
+
+		/* May be slowed due to sheer volume of context switches */
+		if (result[0] > 2 * fence_ns)
+			break;
+	}
+
+	munmap(iqr, 4096);
+	munmap(result, 4096);
+	if (common)
+		gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+	static const struct {
+		const char *name;
+		unsigned int flags;
+	} fair[] = {
+		/*
+		 * none - maximal greed in each client
+		 *
+		 * Push as many frames from each client as fast as possible
+		 */
+		{ "none",       0 },
+		{ "none-vip",   F_VIP }, /* one vip client must meet deadlines */
+		{ "none-solo",  F_SOLO }, /* 1 batch per frame per client */
+		{ "none-share", F_SHARE }, /* read from a common buffer */
+		{ "none-rrul",  F_RRUL }, /* "realtime-response under load" */
+		{ "none-ping",  F_PING }, /* measure inter-engine fairness */
+
+		/*
+		 * throttle - original per client throttling
+		 *
+		 * Used for front buffering rendering where there is no
+		 * extenal frame marker. Each client tries to only keep
+		 * 20ms of work submitted, though that measurement is
+		 * flawed...
+		 *
+		 * This is used by Xorg to try and maintain some resembalance
+		 * of input/output consistency when being feed a continuous
+		 * stream of X11 draw requests straight into scanout, where
+		 * the clients may submit the work faster than can be drawn.
+		 *
+		 * Throttling tracks requests per-file (and assumes that
+		 * all requests are in submission order across the whole file),
+		 * so we split each child to its own fd.
+		 */
+		{ "throttle",       F_THROTTLE | F_ISOLATE },
+		{ "throttle-vip",   F_THROTTLE | F_ISOLATE | F_VIP },
+		{ "throttle-solo",  F_THROTTLE | F_ISOLATE | F_SOLO },
+		{ "throttle-share", F_THROTTLE | F_ISOLATE | F_SHARE },
+		{ "throttle-rrul",  F_THROTTLE | F_ISOLATE | F_RRUL },
+
+		/*
+		 * pace - mesa "submit double buffering"
+		 *
+		 * Submit a frame, wait for previous frame to start. This
+		 * prevents each client from getting too far ahead of its
+		 * rendering, maintaining a consistent input/output latency.
+		 */
+		{ "pace",       F_PACE },
+		{ "pace-solo",  F_PACE | F_SOLO },
+		{ "pace-share", F_PACE | F_SOLO | F_SHARE },
+		{ "pace-ping",  F_PACE | F_SOLO | F_SHARE | F_PING},
+
+		/* sync - only submit a frame at a time */
+		{ "sync",      F_SYNC },
+		{ "sync-vip",  F_SYNC | F_VIP },
+		{ "sync-solo", F_SYNC | F_SOLO },
+
+		/* flow - synchronise execution against the clock (vblank) */
+		{ "flow",       F_PACE | F_FLOW },
+		{ "flow-solo",  F_PACE | F_FLOW | F_SOLO },
+		{ "flow-share", F_PACE | F_FLOW | F_SHARE },
+		{ "flow-ping",  F_PACE | F_FLOW | F_SHARE | F_PING },
+
+		/* next - submit ahead of the clock (vblank double buffering) */
+		{ "next",       F_PACE | F_FLOW | F_NEXT },
+		{ "next-solo",  F_PACE | F_FLOW | F_NEXT | F_SOLO },
+		{ "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+		{ "next-ping",  F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+		/* spare - underutilise by a single client timeslice */
+		{ "spare",      F_PACE | F_FLOW | F_SPARE },
+		{ "spare-solo", F_PACE | F_FLOW | F_SPARE | F_SOLO },
+
+		/* half - run at half pace (submit 16ms of work every 32ms) */
+		{ "half",       F_PACE | F_FLOW | F_HALF },
+		{ "half-solo",  F_PACE | F_FLOW | F_HALF | F_SOLO },
+
+		{}
+	};
+
+	igt_fixture {
+		igt_info("CS timestamp frequency: %d\n",
+			 read_timestamp_frequency(i915));
+
+		igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+	}
+
+	for (typeof(*fair) *f = fair; f->name; f++) {
+		igt_subtest_with_dynamic_f("fair-%s", f->name)  {
+			const struct intel_execution_engine2 *e;
+
+			__for_each_physical_engine(i915, e) {
+				if (!gem_class_can_store_dword(i915, e->class))
+					continue;
+
+				igt_dynamic_f("%s", e->name)
+					fairness(i915, e, timeout, f->flags);
+			}
+		}
+	}
+}
+
 static uint32_t read_ctx_timestamp(int i915,
 				   uint32_t ctx,
 				   const struct intel_execution_engine2 *e)
@@ -2789,6 +3582,10 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		igt_subtest_group {
+			test_fairness(fd, 2);
+		}
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.29.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-12-10  2:09 Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-12-10  2:09 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

v2: igt_assert_f and more commentary; exclude vip from client stats,
include range of frame intervals from each individual client
v3: Write down what the test actually does!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 954 +++++++++++++++++++++++++++++++++
 1 file changed, 954 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index f23d63ac3..67cf88e72 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
 #include <sys/poll.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <sys/resource.h>
 #include <sys/syscall.h>
 #include <sched.h>
 #include <signal.h>
@@ -2516,6 +2517,926 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ctx_ticks(int i915, uint64_t ns)
+{
+	int f = read_timestamp_frequency(i915);
+	if (intel_gen(intel_get_drm_devid(i915)) == 11)
+		f = 12500000; /* icl!!! are you feeling alright? CTX vs CS */
+	return div64_u64_round_up(ns * f, NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+	return div64_u64_round_up(ticks * NSEC_PER_SEC,
+				  read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr,
+		  uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	/* Loop until CTX_TIMESTAMP - initial > @ns */
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(START_TS);
+
+	while (offset_in_page(cs) & 63)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(NOW_TS);
+
+	/* delta = now - start; inverted to match COND_BBE */
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	/* Save delta for reading by COND_BBE */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Delay between SRM and COND_BBE to post the writes */
+	for (int n = 0; n < 8; n++) {
+		*cs++ = MI_STORE_DWORD_IMM;
+		if (use_64b) {
+			*cs++ = addr + 4064;
+			*cs++ = addr >> 32;
+		} else {
+			*cs++ = 0;
+			*cs++ = addr + 4064;
+		}
+		*cs++ = 0;
+	}
+
+	/* Break if delta [time elapsed] > ns */
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ctx_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Otherwise back to recalculating delta */
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	obj.offset = obj.handle << 12;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void tslog(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+	enum { INC, MASK, ADDR };
+	uint32_t *timestamp_lo, *addr_lo;
+	uint32_t *map, *cs;
+
+	igt_require(base);
+
+	map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	cs = map + 512;
+
+	/* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_TIMESTAMP;
+	timestamp_lo = cs;
+	*cs++ = addr;
+	*cs++ = addr >> 32;
+
+	/* Load the address + inc & mask variables */
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR);
+	addr_lo = cs;
+	*cs++ = addr;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR) + 4;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(INC);
+	*cs++ = 4;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(INC) + 4;
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK);
+	*cs++ = 0xfffff7ff;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK) + 4;
+	*cs++ = 0xffffffff;
+
+	/* Increment the [ring] address for saving CS_TIMESTAMP */
+	*cs++ = MI_MATH(8);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+	*cs++ = MI_MATH_AND;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+	/* Rewrite the batch buffer for the next execution */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(timestamp_lo);
+	*cs++ = addr >> 32;
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(addr_lo);
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	obj.offset = obj.handle << 12;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	tslog(i915, e, obj.handle, obj.offset);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const uint32_t *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static bool has_ctx_timestamp(int i915, const struct intel_execution_engine2 *e)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+
+	if (gen == 8 && e->class == I915_ENGINE_CLASS_VIDEO)
+		return false; /* looks fubar */
+
+	return true;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+	const struct intel_execution_engine2 *e;
+	unsigned int count = 0;
+
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		count++;
+	}
+	if (!count)
+		return *not;
+
+	count = rand() % count;
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		if (!count--)
+			break;
+	}
+
+	return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeline,
+		       uint32_t common,
+		       unsigned int flags,
+		       unsigned long *ctl,
+		       unsigned long *median,
+		       unsigned long *iqr)
+#define F_SYNC		(1 << 0)
+#define F_PACE		(1 << 1)
+#define F_FLOW		(1 << 2)
+#define F_HALF		(1 << 3)
+#define F_SOLO		(1 << 4)
+#define F_SPARE		(1 << 5)
+#define F_NEXT		(1 << 6)
+#define F_VIP		(1 << 7)
+#define F_RRUL		(1 << 8)
+#define F_SHARE		(1 << 9)
+#define F_PING		(1 << 10)
+#define F_THROTTLE	(1 << 11)
+#define F_ISOLATE	(1 << 12)
+{
+	const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+	struct drm_i915_gem_exec_object2 obj[4] = {
+		{},
+		{
+			.handle = common ?: gem_create(i915, 4096),
+		},
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+	};
+	struct intel_execution_engine2 ping = *e;
+	int p_fence = -1, n_fence = -1;
+	unsigned long count = 0;
+	int n;
+
+	srandom(getpid());
+	if (flags & F_PING)
+		ping = pick_random_engine(i915, e);
+	obj[0] = tslog_create(i915, ctx, &ping);
+
+	while (!READ_ONCE(*ctl)) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 3,
+			.rsvd1 = ctx,
+			.rsvd2 = -1,
+			.flags = e->flags,
+		};
+
+		if (flags & F_FLOW) {
+			unsigned int seq;
+
+			seq = count;
+			if (flags & F_NEXT)
+				seq++;
+
+			execbuf.rsvd2 =
+				sw_sync_timeline_create_fence(timeline, seq);
+			execbuf.flags |= I915_EXEC_FENCE_IN;
+		}
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+		for (n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+		close(execbuf.rsvd2);
+
+		execbuf.buffer_count = 1;
+		execbuf.batch_start_offset = 2048;
+		execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+		execbuf.rsvd2 = n_fence;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACE && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+
+		if (flags & F_SYNC) {
+			struct pollfd pfd = {
+				.fd = n_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+
+		if (flags & F_THROTTLE)
+			igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+		igt_swap(obj[2], obj[3]);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	close(p_fence);
+
+	gem_close(i915, obj[3].handle);
+	gem_close(i915, obj[2].handle);
+	if (obj[1].handle != common)
+		gem_close(i915, obj[1].handle);
+
+	gem_sync(i915, obj[0].handle);
+	if (median) {
+		uint32_t *map;
+
+		/*
+		 * We recorded the CS_TIMESTAMP of each frame, and if
+		 * the GPU is being shared completely fairly, we expect
+		 * each frame to be at the same interval from the last.
+		 *
+		 * Compute the interval between frames and report back
+		 * both the median interval and the range for this client.
+		 */
+
+		map = gem_mmap__device_coherent(i915, obj[0].handle,
+						0, 4096, PROT_WRITE);
+		for (n = 1; n < min(count, 512); n++) {
+			igt_assert(map[n]);
+			map[n - 1] = map[n] - map[n - 1];
+		}
+		qsort(map, --n, sizeof(*map), cmp_u32);
+		*iqr = ticks_to_ns(i915, map[(3 * n + 3) / 4] - map[n / 4]);
+		*median = ticks_to_ns(i915, map[n / 2]);
+		munmap(map, 4096);
+	}
+	gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+	uint64_t cpu_time = 0;
+
+	cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+	cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+	return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+	struct timespec tv = { .tv_nsec = delay_ns };
+	nanosleep(&tv, NULL);
+	sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+	unsigned long *result, *iqr;
+	uint32_t common = 0;
+
+	igt_require(has_ctx_timestamp(i915, e));
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+	if (flags & F_SHARE)
+		common = gem_create(i915, 4095);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(result != MAP_FAILED);
+	iqr = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(iqr != MAP_FAILED);
+
+	/*
+	 * The combined workload always runs at a 60fps target (unless F_HALF!).
+	 * This gives a frame of interval of 16ms that is evenly split across
+	 * all the clients, so simulating a system with a bunch of clients that
+	 * are perfectly balanced and can sustain 60fps. Our job is to ensure
+	 * that each client does run at a smooth 60fps.
+	 *
+	 * Each client runs a fixed length delay loop (as a single request,
+	 * or split into 3) and then records the CS_TIMESTAMP after completing
+	 * its delay. Given a fair allotment of GPU time to each client,
+	 * that timestamp will [ideally] be at a precise 16ms intervals.
+	 * In practice, time is wasted on context switches, so as the number
+	 * of clients increases, the proprotion of time spent on context
+	 * switches grows. As we get to 64 render clients, we will be spending
+	 * as much time in context switches as executing the client workloads.
+	 *
+	 * Each client frame may be paced by some throttling technique found
+	 * in the wild. i.e. each client may wait until a simulated vblank
+	 * to indicate the start of a new frame, or it may wait until the
+	 * completion of a previous frame. This causes submission from each
+	 * client and across the system to be chunky and uneven.
+	 *
+	 * We look at the variation of frame intervals within each client, and
+	 * the variation of the medians across the clients to see if the
+	 * distribution (budget) of GPU time was fair enough.
+	 *
+	 * Alternative (and important) metrics will be more latency centric;
+	 * looking at how well we can sustain meeting deadline given competition
+	 * by clients for the GPU.
+	 */
+
+	for (int n = 2; n <= 256; n <<= 1) { /* 32 == 500us per client */
+		int timeline = sw_sync_timeline_create();
+		int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+		int nchild = n - 1; /* odd for easy medians */
+		const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+		const int lo = nchild / 4;
+		const int hi = (3 * nchild + 3) / 4 - 1;
+		struct rusage old_usage, usage;
+		uint64_t cpu_time, d_time;
+		struct timespec tv;
+		struct igt_mean m;
+
+		memset(result, 0, (nchild + 1) * sizeof(result[0]));
+
+		if (flags & F_PING) { /* fill the others with light bg load */
+			struct intel_execution_engine2 *ping;
+
+			__for_each_physical_engine(i915, ping) {
+				if (ping->flags == e->flags)
+					continue;
+
+				igt_fork(child, 1) {
+					uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+					fair_child(i915, ctx, ping,
+						   child_ns / 8,
+						   -1, common,
+						   F_SOLO | F_PACE | F_SHARE,
+						   &result[nchild],
+						   NULL, NULL);
+
+					gem_context_destroy(i915, ctx);
+				}
+			}
+		}
+
+		getrusage(RUSAGE_CHILDREN, &old_usage);
+		igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+		igt_fork(child, nchild) {
+			uint32_t ctx;
+
+			if (flags & F_ISOLATE) {
+				int clone, dmabuf = -1;
+
+				if (common)
+					dmabuf = prime_handle_to_fd(i915, common);
+
+				clone = gem_reopen_driver(i915);
+				gem_context_copy_engines(i915, 0, clone, 0);
+				i915 = clone;
+
+				if (dmabuf != -1)
+					common = prime_fd_to_handle(i915, dmabuf);
+			}
+
+			ctx = gem_context_clone_with_engines(i915, 0);
+
+			if (flags & F_VIP && child == 0) {
+				gem_context_set_priority(i915, ctx, MAX_PRIO);
+				flags |= F_FLOW;
+			}
+			if (flags & F_RRUL && child == 0)
+				flags |= F_SOLO | F_FLOW | F_SYNC;
+
+			fair_child(i915, ctx, e, child_ns,
+				   timeline, common, flags,
+				   &result[nchild],
+				   &result[child], &iqr[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		while (nfences--)
+			timeline_advance(timeline, fence_ns);
+
+		result[nchild] = 1;
+		for (int child = 0; child < nchild; child++) {
+			while (!READ_ONCE(result[child]))
+				timeline_advance(timeline, fence_ns);
+		}
+
+		igt_waitchildren();
+		close(timeline);
+
+		/*
+		 * Are we running out of CPU time, and fail to submit frames?
+		 *
+		 * We try to rule out any undue impact on the GPU scheduling
+		 * from the CPU scheduler by looking for core saturation. If
+		 * we may be in a situation where the clients + kernel are
+		 * taking a whole core (think lockdep), then it is increasingly
+		 * likely that our measurements include delays from the CPU
+		 * scheduler. Err on the side of caution.
+		 */
+		d_time = igt_nsec_elapsed(&tv);
+		getrusage(RUSAGE_CHILDREN, &usage);
+		cpu_time = d_cpu_time(&usage, &old_usage);
+		igt_debug("CPU usage: %.0f%%\n", 100. * cpu_time / d_time);
+		if (4 * cpu_time > 3 * d_time) {
+			if (nchild > 7) /* good enough to judge pass/fail */
+				break;
+
+			igt_skip_on_f(4 * cpu_time > 3 * d_time,
+				      "%.0f%% CPU usage, presuming capacity exceeded\n",
+				      100. * cpu_time / d_time);
+		}
+
+		/* With no contention, we should match our target frametime */
+		if (nchild == 1) {
+			igt_assert(4 * result[0] > 3 * fence_ns &&
+				   3 * result[0] < 4 * fence_ns);
+			continue;
+		}
+
+		/*
+		 * The VIP should always be able to hit the target frame rate;
+		 * regardless of budget contention from lessor clients.
+		 */
+		if (flags & (F_VIP | F_RRUL)) {
+			igt_info("VIP interval %.2fms, range %.2fms\n",
+				 1e-6 * result[0], 1e-6 * iqr[0]);
+			igt_assert_f(4 * result[0] > 3 * fence_ns &&
+				     3 * result[0] < 4 * fence_ns,
+				     "VIP expects to run exactly when it wants, expects an interval of %.2fms, was %.2fms\n",
+				     1e-6 * fence_ns, 1e-6 * result[0]);
+			igt_assert_f(2 * iqr[0] < result[0],
+				     "VIP frame IQR %.2fms exceeded median threshold %.2fms\n",
+				     1e-6 * iqr[0],
+				     1e-6 * result[0] / 2);
+			if (!--nchild)
+				continue;
+
+			/* Exclude the VIP result from the plebian statistics */
+			memmove(result, result + 1, nchild * sizeof(*result));
+			memmove(iqr, iqr + 1, nchild * sizeof(*iqr));
+		}
+
+		igt_mean_init(&m);
+		for (int child = 0; child < nchild; child++)
+			igt_mean_add(&m, result[child]);
+
+		qsort(result, nchild, sizeof(*result), cmp_ul);
+		qsort(iqr, nchild, sizeof(*iqr), cmp_ul);
+
+		/*
+		 * The target interval for median/mean is 16ms (fence_ns).
+		 * However, this work is evenly split across the clients so
+		 * the range (and median) of client medians may be much less
+		 * than 16ms [16/3N]. We present median of medians to try
+		 * and avoid any instability while running in CI; at the cost
+		 * of insensitivity!
+		 */
+		igt_info("%3d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f [%.1f, %.1f], mean: %.1f ± %.2f ms\n",
+			 nchild,
+			 1e-6 * result[0],  1e-6 * result[nchild - 1],
+			 1e-6 * result[lo], 1e-6 * result[hi],
+			 1e-6 * result[nchild / 2],
+			 1e-6 * iqr[lo], 1e-6 * iqr[hi],
+			 1e-6 * igt_mean_get(&m),
+			 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+		igt_assert_f(iqr[nchild / 2] < 2 * result[nchild / 2],
+			     "Child frame IQR %.2fms exceeded median threshold %.2fms\n",
+			     1e-6 * iqr[nchild / 2],
+			     1e-6 * result[nchild / 2] * 2);
+
+		igt_assert_f(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+			     3 * igt_mean_get(&m) < 4 * result[nchild / 2],
+			     "Mean of client interval %.2fms differs from median %.2fms, distribution is skewed\n",
+
+			     1e-6 * igt_mean_get(&m), 1e-6 * result[nchild / 2]);
+
+		igt_assert_f(2 * (result[hi] - result[lo]) < result[nchild / 2],
+			     "Interquartile range of client intervals %.2fms is as large as the median threshold %.2fms, clients are not evenly distributed!\n",
+			     1e-6 * (result[hi] - result[lo]),
+			     1e-6 * result[nchild / 2] / 2);
+
+		/* May be slowed due to sheer volume of context switches */
+		if (result[0] > 2 * fence_ns)
+			break;
+	}
+
+	munmap(iqr, 4096);
+	munmap(result, 4096);
+	if (common)
+		gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+	static const struct {
+		const char *name;
+		unsigned int flags;
+	} fair[] = {
+		/*
+		 * none - maximal greed in each client
+		 *
+		 * Push as many frames from each client as fast as possible
+		 */
+		{ "none",       0 },
+		{ "none-vip",   F_VIP }, /* one vip client must meet deadlines */
+		{ "none-solo",  F_SOLO }, /* 1 batch per frame per client */
+		{ "none-share", F_SHARE }, /* read from a common buffer */
+		{ "none-rrul",  F_RRUL }, /* "realtime-response under load" */
+		{ "none-ping",  F_PING }, /* measure inter-engine fairness */
+
+		/*
+		 * throttle - original per client throttling
+		 *
+		 * Used for front buffering rendering where there is no
+		 * extenal frame marker. Each client tries to only keep
+		 * 20ms of work submitted, though that measurement is
+		 * flawed...
+		 *
+		 * This is used by Xorg to try and maintain some resembalance
+		 * of input/output consistency when being feed a continuous
+		 * stream of X11 draw requests straight into scanout, where
+		 * the clients may submit the work faster than can be drawn.
+		 *
+		 * Throttling tracks requests per-file (and assumes that
+		 * all requests are in submission order across the whole file),
+		 * so we split each child to its own fd.
+		 */
+		{ "throttle",       F_THROTTLE | F_ISOLATE },
+		{ "throttle-vip",   F_THROTTLE | F_ISOLATE | F_VIP },
+		{ "throttle-solo",  F_THROTTLE | F_ISOLATE | F_SOLO },
+		{ "throttle-share", F_THROTTLE | F_ISOLATE | F_SHARE },
+		{ "throttle-rrul",  F_THROTTLE | F_ISOLATE | F_RRUL },
+
+		/*
+		 * pace - mesa "submit double buffering"
+		 *
+		 * Submit a frame, wait for previous frame to start. This
+		 * prevents each client from getting too far ahead of its
+		 * rendering, maintaining a consistent input/output latency.
+		 */
+		{ "pace",       F_PACE },
+		{ "pace-solo",  F_PACE | F_SOLO },
+		{ "pace-share", F_PACE | F_SOLO | F_SHARE },
+		{ "pace-ping",  F_PACE | F_SOLO | F_SHARE | F_PING},
+
+		/* sync - only submit a frame at a time */
+		{ "sync",      F_SYNC },
+		{ "sync-vip",  F_SYNC | F_VIP },
+		{ "sync-solo", F_SYNC | F_SOLO },
+
+		/* flow - synchronise execution against the clock (vblank) */
+		{ "flow",       F_PACE | F_FLOW },
+		{ "flow-solo",  F_PACE | F_FLOW | F_SOLO },
+		{ "flow-share", F_PACE | F_FLOW | F_SHARE },
+		{ "flow-ping",  F_PACE | F_FLOW | F_SHARE | F_PING },
+
+		/* next - submit ahead of the clock (vblank double buffering) */
+		{ "next",       F_PACE | F_FLOW | F_NEXT },
+		{ "next-solo",  F_PACE | F_FLOW | F_NEXT | F_SOLO },
+		{ "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+		{ "next-ping",  F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+		/* spare - underutilise by a single client timeslice */
+		{ "spare",      F_PACE | F_FLOW | F_SPARE },
+		{ "spare-solo", F_PACE | F_FLOW | F_SPARE | F_SOLO },
+
+		/* half - run at half pace (submit 16ms of work every 32ms) */
+		{ "half",       F_PACE | F_FLOW | F_HALF },
+		{ "half-solo",  F_PACE | F_FLOW | F_HALF | F_SOLO },
+
+		{}
+	};
+
+	igt_fixture {
+		igt_info("CS timestamp frequency: %d\n",
+			 read_timestamp_frequency(i915));
+
+		igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+	}
+
+	for (typeof(*fair) *f = fair; f->name; f++) {
+		igt_subtest_with_dynamic_f("fair-%s", f->name)  {
+			const struct intel_execution_engine2 *e;
+
+			__for_each_physical_engine(i915, e) {
+				if (!gem_class_can_store_dword(i915, e->class))
+					continue;
+
+				igt_dynamic_f("%s", e->name)
+					fairness(i915, e, timeout, f->flags);
+			}
+		}
+	}
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+				   uint32_t ctx,
+				   const struct intel_execution_engine2 *e)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+		.offset = 32 << 20,
+		.relocs_ptr = to_user_pointer(&reloc),
+		.relocation_count = 1,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = e->flags,
+		.rsvd1 = ctx,
+	};
+#define RUNTIME (base + 0x3a8)
+	uint32_t *map, *cs;
+	uint32_t ts;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, obj.handle,
+					     0, 4096, PROT_WRITE);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = RUNTIME;
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj.handle;
+	reloc.presumed_offset = obj.offset;
+	reloc.offset = offset_in_page(cs);
+	reloc.delta = 4000;
+	*cs++ = obj.offset + 4000;
+	*cs++ = obj.offset >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+
+	ts = map[1000];
+	munmap(map, 4096);
+
+	return ts;
+}
+
+static void fairslice(int i915,
+		      const struct intel_execution_engine2 *e,
+		      unsigned long flags)
+{
+	igt_spin_t *spin = NULL;
+	uint32_t ctx[3];
+	uint32_t ts[3];
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		ctx[i] = gem_context_clone_with_engines(i915, 0);
+		if (spin == NULL) {
+			spin = __igt_spin_new(i915,
+					      .ctx = ctx[i],
+					      .engine = e->flags,
+					      .flags = flags);
+		} else {
+			struct drm_i915_gem_execbuffer2 eb = {
+				.buffer_count = 1,
+				.buffers_ptr = to_user_pointer(&spin->obj[IGT_SPIN_BATCH]),
+				.flags = e->flags,
+				.rsvd1 = ctx[i],
+			};
+			gem_execbuf(i915, &eb);
+		}
+	}
+
+	sleep(2); /* over the course of many timeslices */
+
+	igt_assert(gem_bo_busy(i915, spin->handle));
+	igt_spin_end(spin);
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		gem_context_destroy(i915, ctx[i]);
+	igt_spin_free(i915, spin);
+
+	qsort(ts, 3, sizeof(*ts), cmp_u32);
+	igt_info("%s: [%.1f, %.1f, %.1f] ms\n", e->name,
+		 1e-6 * ticks_to_ns(i915, ts[0]),
+		 1e-6 * ticks_to_ns(i915, ts[1]),
+		 1e-6 * ticks_to_ns(i915, ts[2]));
+
+	igt_assert_f(ts[2], "CTX_TIMESTAMP not reported!\n");
+	igt_assert_f((ts[2] - ts[0]) * 6 < ts[1],
+		     "Range of timeslices greater than tolerable: %.2fms > %.2fms; unfair!\n",
+		     1e-6 * ticks_to_ns(i915, ts[2] - ts[0]),
+		     1e-6 * ticks_to_ns(i915, ts[1]) / 6);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2582,6 +3503,35 @@ igt_main
 		test_each_engine("lateslice", fd, e)
 			lateslice(fd, e->flags);
 
+		igt_subtest_group {
+			igt_fixture {
+				igt_require(gem_scheduler_has_semaphores(fd));
+				igt_require(gem_scheduler_has_preemption(fd));
+				igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+			}
+
+			test_each_engine("fairslice", fd, e)
+				fairslice(fd, e, 0);
+
+			test_each_engine("u-fairslice", fd, e)
+				fairslice(fd, e, IGT_SPIN_USERPTR);
+
+			igt_subtest("fairslice-all")  {
+				__for_each_physical_engine(fd, e) {
+					igt_fork(child, 1)
+						fairslice(fd, e, 0);
+				}
+				igt_waitchildren();
+			}
+			igt_subtest("u-fairslice-all")  {
+				__for_each_physical_engine(fd, e) {
+					igt_fork(child, 1)
+						fairslice(fd, e, IGT_SPIN_USERPTR);
+				}
+				igt_waitchildren();
+			}
+		}
+
 		test_each_engine("submit-early-slice", fd, e)
 			submit_slice(fd, e, EARLY_SUBMIT);
 		test_each_engine("submit-golden-slice", fd, e)
@@ -2610,6 +3560,10 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		igt_subtest_group {
+			test_fairness(fd, 2);
+		}
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.29.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-11-24 23:39 Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-11-24 23:39 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 847 +++++++++++++++++++++++++++++++++
 1 file changed, 847 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index f23d63ac3..d888efcd7 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
 #include <sys/poll.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <sys/resource.h>
 #include <sys/syscall.h>
 #include <sched.h>
 #include <signal.h>
@@ -2516,6 +2517,819 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ctx_ticks(int i915, uint64_t ns)
+{
+	int f = read_timestamp_frequency(i915);
+	if (intel_gen(intel_get_drm_devid(i915)) == 11)
+		f = 12500000; /* icl!!! are you feeling alright? CTX vs CS */
+	return div64_u64_round_up(ns * f, NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+	return div64_u64_round_up(ticks * NSEC_PER_SEC,
+				  read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr,
+		  uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	/* Loop until CTX_TIMESTAMP - initial > @ns */
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(START_TS);
+
+	while (offset_in_page(cs) & 63)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(NOW_TS);
+
+	/* delta = now - start; inverted to match COND_BBE */
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	/* Save delta for reading by COND_BBE */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Delay between SRM and COND_BBE to post the writes */
+	for (int n = 0; n < 8; n++) {
+		*cs++ = MI_STORE_DWORD_IMM;
+		if (use_64b) {
+			*cs++ = addr + 4064;
+			*cs++ = addr >> 32;
+		} else {
+			*cs++ = 0;
+			*cs++ = addr + 4064;
+		}
+		*cs++ = 0;
+	}
+
+	/* Break if delta [time elapsed] > ns */
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ctx_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Otherwise back to recalculating delta */
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	obj.offset = obj.handle << 12;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void tslog(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+	enum { INC, MASK, ADDR };
+	uint32_t *timestamp_lo, *addr_lo;
+	uint32_t *map, *cs;
+
+	igt_require(base);
+
+	map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	cs = map + 512;
+
+	/* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_TIMESTAMP;
+	timestamp_lo = cs;
+	*cs++ = addr;
+	*cs++ = addr >> 32;
+
+	/* Load the address + inc & mask variables */
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR);
+	addr_lo = cs;
+	*cs++ = addr;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR) + 4;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(INC);
+	*cs++ = 4;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(INC) + 4;
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK);
+	*cs++ = 0xfffff7ff;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK) + 4;
+	*cs++ = 0xffffffff;
+
+	/* Increment the [ring] address for saving CS_TIMESTAMP */
+	*cs++ = MI_MATH(8);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+	*cs++ = MI_MATH_AND;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+	/* Rewrite the batch buffer for the next execution */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(timestamp_lo);
+	*cs++ = addr >> 32;
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(addr_lo);
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	obj.offset = obj.handle << 12;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	tslog(i915, e, obj.handle, obj.offset);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const uint32_t *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static bool has_ctx_timestamp(int i915, const struct intel_execution_engine2 *e)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+
+	if (gen == 8 && e->class == I915_ENGINE_CLASS_VIDEO)
+		return false; /* looks fubar */
+
+	return true;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+	const struct intel_execution_engine2 *e;
+	unsigned int count = 0;
+
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		count++;
+	}
+	if (!count)
+		return *not;
+
+	count = rand() % count;
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		if (!count--)
+			break;
+	}
+
+	return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeline,
+		       uint32_t common,
+		       unsigned int flags,
+		       unsigned long *ctl,
+		       unsigned long *out)
+#define F_SYNC		(1 << 0)
+#define F_PACE		(1 << 1)
+#define F_FLOW		(1 << 2)
+#define F_HALF		(1 << 3)
+#define F_SOLO		(1 << 4)
+#define F_SPARE		(1 << 5)
+#define F_NEXT		(1 << 6)
+#define F_VIP		(1 << 7)
+#define F_RRUL		(1 << 8)
+#define F_SHARE		(1 << 9)
+#define F_PING		(1 << 10)
+#define F_THROTTLE	(1 << 11)
+#define F_ISOLATE	(1 << 12)
+{
+	const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+	struct drm_i915_gem_exec_object2 obj[4] = {
+		{},
+		{
+			.handle = common ?: gem_create(i915, 4096),
+		},
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+	};
+	struct intel_execution_engine2 ping = *e;
+	int p_fence = -1, n_fence = -1;
+	unsigned long count = 0;
+	int n;
+
+	srandom(getpid());
+	if (flags & F_PING)
+		ping = pick_random_engine(i915, e);
+	obj[0] = tslog_create(i915, ctx, &ping);
+
+	while (!READ_ONCE(*ctl)) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 4,
+			.rsvd1 = ctx,
+			.rsvd2 = -1,
+			.flags = e->flags,
+		};
+
+		if (flags & F_FLOW) {
+			unsigned int seq;
+
+			seq = count;
+			if (flags & F_NEXT)
+				seq++;
+
+			execbuf.rsvd2 =
+				sw_sync_timeline_create_fence(timeline, seq);
+			execbuf.flags |= I915_EXEC_FENCE_IN;
+		}
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+		for (n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+		close(execbuf.rsvd2);
+
+		execbuf.buffer_count = 1;
+		execbuf.batch_start_offset = 2048;
+		execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+		execbuf.rsvd2 = n_fence;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACE && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+
+		if (flags & F_SYNC) {
+			struct pollfd pfd = {
+				.fd = n_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+
+		if (flags & F_THROTTLE)
+			igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+		igt_swap(obj[2], obj[3]);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	close(p_fence);
+
+	gem_close(i915, obj[3].handle);
+	gem_close(i915, obj[2].handle);
+	if (obj[1].handle != common)
+		gem_close(i915, obj[1].handle);
+
+	gem_sync(i915, obj[0].handle);
+	if (out) {
+		uint32_t *map;
+
+		map = gem_mmap__device_coherent(i915, obj[0].handle,
+						0, 4096, PROT_WRITE);
+		for (n = 1; n < min(count, 512); n++) {
+			igt_assert(map[n]);
+			map[n - 1] = map[n] - map[n - 1];
+		}
+		qsort(map, --n, sizeof(*map), cmp_u32);
+		*out = ticks_to_ns(i915, map[n / 2]);
+		munmap(map, 4096);
+	}
+	gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+	uint64_t cpu_time = 0;
+
+	cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+	cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+	return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+	struct timespec tv = { .tv_nsec = delay_ns };
+	nanosleep(&tv, NULL);
+	sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+	unsigned long *result;
+	uint32_t common = 0;
+
+	igt_require(has_ctx_timestamp(i915, e));
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+	if (flags & F_SHARE)
+		common = gem_create(i915, 4095);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 64; n <<= 1) { /* 32 == 500us per client */
+		int timeline = sw_sync_timeline_create();
+		int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+		const int nchild = n - 1; /* odd for easy medians */
+		const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+		const int lo = nchild / 4;
+		const int hi = (3 * nchild + 3) / 4 - 1;
+		struct rusage old_usage, usage;
+		uint64_t cpu_time, d_time;
+		unsigned long vip = -1;
+		struct timespec tv;
+		struct igt_mean m;
+
+		if (flags & F_PING) {
+			struct intel_execution_engine2 *ping;
+
+			__for_each_physical_engine(i915, ping) {
+				if (ping->flags == e->flags)
+					continue;
+
+				igt_fork(child, 1) {
+					uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+					fair_child(i915, ctx, ping,
+						   child_ns / 8,
+						   -1, common,
+						   F_SOLO | F_PACE | F_SHARE,
+						   &result[nchild],
+						   NULL);
+
+					gem_context_destroy(i915, ctx);
+				}
+			}
+		}
+
+		memset(result, 0, (nchild + 1) * sizeof(result[0]));
+		getrusage(RUSAGE_CHILDREN, &old_usage);
+		igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+		igt_fork(child, nchild) {
+			uint32_t ctx;
+
+			if (flags & F_ISOLATE) {
+				int clone, dmabuf = -1;
+
+				if (common)
+					dmabuf = prime_handle_to_fd(i915, common);
+
+				clone = gem_reopen_driver(i915);
+				gem_context_copy_engines(i915, 0, clone, 0);
+				i915 = clone;
+
+				if (dmabuf != -1)
+					common = prime_fd_to_handle(i915, dmabuf);
+			}
+
+			ctx = gem_context_clone_with_engines(i915, 0);
+
+			if (flags & F_VIP && child == 0) {
+				gem_context_set_priority(i915, ctx, MAX_PRIO);
+				flags |= F_FLOW;
+			}
+			if (flags & F_RRUL && child == 0)
+				flags |= F_SOLO | F_FLOW | F_SYNC;
+
+			fair_child(i915, ctx, e, child_ns,
+				   timeline, common, flags,
+				   &result[nchild],
+				   &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		while (nfences--)
+			timeline_advance(timeline, fence_ns);
+
+		result[nchild] = 1;
+		for (int child = 0; child < nchild; child++) {
+			while (!READ_ONCE(result[child]))
+				timeline_advance(timeline, fence_ns);
+		}
+
+		igt_waitchildren();
+		close(timeline);
+
+		/* Are we running out of CPU time, and fail to submit frames? */
+		d_time = igt_nsec_elapsed(&tv);
+		getrusage(RUSAGE_CHILDREN, &usage);
+		cpu_time = d_cpu_time(&usage, &old_usage);
+		if (10 * cpu_time > 9 * d_time) {
+			if (nchild > 7)
+				break;
+
+			igt_skip_on_f(10 * cpu_time > 9 * d_time,
+				      "%.0f%% CPU usage, presuming capacity exceeded\n",
+				      100. * cpu_time / d_time);
+		}
+
+		igt_mean_init(&m);
+		for (int child = 0; child < nchild; child++)
+			igt_mean_add(&m, result[child]);
+
+		if (flags & (F_VIP | F_RRUL))
+			vip = result[0];
+
+		qsort(result, nchild, sizeof(*result), cmp_ul);
+		igt_info("%2d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+			 nchild,
+			 1e-6 * result[0],  1e-6 * result[nchild - 1],
+			 1e-6 * result[lo], 1e-6 * result[hi],
+			 1e-6 * result[nchild / 2],
+			 1e-6 * igt_mean_get(&m),
+			 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+		if (vip != -1) {
+			igt_info("VIP interval %.2f ms\n", 1e-6 * vip);
+			igt_assert(4 * vip > 3 * fence_ns &&
+				   3 * vip < 4 * fence_ns);
+		}
+
+		/* May be slowed due to sheer volume of context switches */
+		igt_assert(4 * igt_mean_get(&m) > 3 * fence_ns &&
+			       igt_mean_get(&m) < 3 * fence_ns);
+
+		igt_assert(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+			   3 * igt_mean_get(&m) < 4 * result[nchild / 2]);
+
+		igt_assert(2 * (result[hi] - result[lo]) < result[nchild / 2]);
+	}
+
+	munmap(result, 4096);
+	if (common)
+		gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+	static const struct {
+		const char *name;
+		unsigned int flags;
+	} fair[] = {
+		/*
+		 * none - maximal greed in each client
+		 *
+		 * Push as many frames from each client as fast as possible
+		 */
+		{ "none",       0 },
+		{ "none-vip",   F_VIP }, /* one vip client must meet deadlines */
+		{ "none-solo",  F_SOLO }, /* 1 batch per frame per client */
+		{ "none-share", F_SHARE }, /* read from a common buffer */
+		{ "none-rrul",  F_RRUL }, /* "realtime-response under load" */
+		{ "none-ping",  F_PING }, /* measure inter-engine fairness */
+
+		/*
+		 * throttle - original per client throttling
+		 *
+		 * Used for front buffering rendering where there is no
+		 * extenal frame marker. Each client tries to only keep
+		 * 20ms of work submitted, though that measurement is
+		 * flawed...
+		 *
+		 * This is used by Xorg to try and maintain some resembalance
+		 * of input/output consistency when being feed a continuous
+		 * stream of X11 draw requests straight into scanout, where
+		 * the clients may submit the work faster than can be drawn.
+		 *
+		 * Throttling tracks requests per-file (and assumes that
+		 * all requests are in submission order across the whole file),
+		 * so we split each child to its own fd.
+		 */
+		{ "throttle",       F_THROTTLE | F_ISOLATE },
+		{ "throttle-vip",   F_THROTTLE | F_ISOLATE | F_VIP },
+		{ "throttle-solo",  F_THROTTLE | F_ISOLATE | F_SOLO },
+		{ "throttle-share", F_THROTTLE | F_ISOLATE | F_SHARE },
+		{ "throttle-rrul",  F_THROTTLE | F_ISOLATE | F_RRUL },
+
+		/*
+		 * pace - mesa "submit double buffering"
+		 *
+		 * Submit a frame, wait for previous frame to start. This
+		 * prevents each client from getting too far ahead of its
+		 * rendering, maintaining a consistent input/output latency.
+		 */
+		{ "pace",       F_PACE },
+		{ "pace-solo",  F_PACE | F_SOLO},
+		{ "pace-share", F_PACE | F_SHARE},
+		{ "pace-ping",  F_PACE | F_SHARE | F_PING},
+
+		/* sync - only submit a frame at a time */
+		{ "sync",      F_SYNC },
+		{ "sync-vip",  F_SYNC | F_VIP },
+		{ "sync-solo", F_SYNC | F_SOLO },
+
+		/* flow - synchronise execution against the clock (vblank) */
+		{ "flow",       F_PACE | F_FLOW },
+		{ "flow-share", F_PACE | F_FLOW | F_SHARE },
+		{ "flow-ping",  F_PACE | F_FLOW | F_SHARE | F_PING },
+
+		/* next - submit ahead of the clock (vblank double buffering) */
+		{ "next",       F_PACE | F_FLOW | F_NEXT },
+		{ "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+		{ "next-ping",  F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+		/* spare - underutilise by a single client timeslice */
+		{ "spare", F_PACE | F_FLOW | F_SPARE },
+
+		/* half - run at half pace (submit 16ms of work every 32ms) */
+		{ "half",  F_PACE | F_FLOW | F_HALF },
+
+		{}
+	};
+
+	igt_fixture {
+		igt_info("CS timestamp frequency: %d\n",
+			 read_timestamp_frequency(i915));
+
+		igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+	}
+
+	for (typeof(*fair) *f = fair; f->name; f++) {
+		igt_subtest_with_dynamic_f("fair-%s", f->name)  {
+			const struct intel_execution_engine2 *e;
+
+			__for_each_physical_engine(i915, e) {
+				if (!gem_class_can_store_dword(i915, e->class))
+					continue;
+
+				igt_dynamic_f("%s", e->name)
+					fairness(i915, e, timeout, f->flags);
+			}
+		}
+	}
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+				   uint32_t ctx,
+				   const struct intel_execution_engine2 *e)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+		.offset = 32 << 20,
+		.relocs_ptr = to_user_pointer(&reloc),
+		.relocation_count = 1,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = e->flags,
+		.rsvd1 = ctx,
+	};
+#define RUNTIME (base + 0x3a8)
+	uint32_t *map, *cs;
+	uint32_t ts;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, obj.handle,
+					     0, 4096, PROT_WRITE);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = RUNTIME;
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj.handle;
+	reloc.presumed_offset = obj.offset;
+	reloc.offset = offset_in_page(cs);
+	reloc.delta = 4000;
+	*cs++ = obj.offset + 4000;
+	*cs++ = obj.offset >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+
+	ts = map[1000];
+	munmap(map, 4096);
+
+	return ts;
+}
+
+static void fairslice(int i915,
+		      const struct intel_execution_engine2 *e,
+		      unsigned long flags)
+{
+	igt_spin_t *spin = NULL;
+	uint32_t ctx[3];
+	uint32_t ts[3];
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		ctx[i] = gem_context_clone_with_engines(i915, 0);
+		if (spin == NULL) {
+			spin = __igt_spin_new(i915,
+					      .ctx = ctx[i],
+					      .engine = e->flags,
+					      .flags = flags);
+		} else {
+			struct drm_i915_gem_execbuffer2 eb = {
+				.buffer_count = 1,
+				.buffers_ptr = to_user_pointer(&spin->obj[IGT_SPIN_BATCH]),
+				.flags = e->flags,
+				.rsvd1 = ctx[i],
+			};
+			gem_execbuf(i915, &eb);
+		}
+	}
+
+	sleep(2); /* over the course of many timeslices */
+
+	igt_assert(gem_bo_busy(i915, spin->handle));
+	igt_spin_end(spin);
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+		gem_context_destroy(i915, ctx[i]);
+	igt_spin_free(i915, spin);
+
+	qsort(ts, 3, sizeof(*ts), cmp_u32);
+	igt_info("%s: [%.1f, %.1f] ms\n", e->name,
+		 1e-6 * ticks_to_ns(i915, ts[0]),
+		 1e-6 * ticks_to_ns(i915, ts[2]));
+
+	igt_assert(ts[0] && ts[2] > ts[0]);
+	igt_assert(4 * ts[0] > 3 * ts[2]);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2582,6 +3396,35 @@ igt_main
 		test_each_engine("lateslice", fd, e)
 			lateslice(fd, e->flags);
 
+		igt_subtest_group {
+			igt_fixture {
+				igt_require(gem_scheduler_has_semaphores(fd));
+				igt_require(gem_scheduler_has_preemption(fd));
+				igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+			}
+
+			test_each_engine("fairslice", fd, e)
+				fairslice(fd, e, 0);
+
+			test_each_engine("u-fairslice", fd, e)
+				fairslice(fd, e, IGT_SPIN_USERPTR);
+
+			igt_subtest("fairslice-all")  {
+				__for_each_physical_engine(fd, e) {
+					igt_fork(child, 1)
+						fairslice(fd, e, 0);
+				}
+				igt_waitchildren();
+			}
+			igt_subtest("u-fairslice-all")  {
+				__for_each_physical_engine(fd, e) {
+					igt_fork(child, 1)
+						fairslice(fd, e, IGT_SPIN_USERPTR);
+				}
+				igt_waitchildren();
+			}
+		}
+
 		test_each_engine("submit-early-slice", fd, e)
 			submit_slice(fd, e, EARLY_SUBMIT);
 		test_each_engine("submit-golden-slice", fd, e)
@@ -2610,6 +3453,10 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		igt_subtest_group {
+			test_fairness(fd, 2);
+		}
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.29.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-08-03 13:57 Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-08-03 13:57 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 816 +++++++++++++++++++++++++++++++++
 1 file changed, 816 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 488d93511..7c8ea6d70 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
 #include <sys/poll.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <sys/resource.h>
 #include <sys/syscall.h>
 #include <sched.h>
 #include <signal.h>
@@ -2503,6 +2504,800 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+	return div64_u64_round_up(ticks * NSEC_PER_SEC,
+				  read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr,
+		  uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	/* Loop until CTX_TIMESTAMP - initial > @ns */
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(START_TS);
+
+	while (offset_in_page(cs) & 63)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(NOW_TS);
+
+	/* delta = now - start; inverted to match COND_BBE */
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	/* Save delta for reading by COND_BBE */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Delay between SRM and COND_BBE to post the writes */
+	for (int n = 0; n < 8; n++) {
+		*cs++ = MI_STORE_DWORD_IMM;
+		if (use_64b) {
+			*cs++ = addr + 4064;
+			*cs++ = addr >> 32;
+		} else {
+			*cs++ = 0;
+			*cs++ = addr + 4064;
+		}
+		*cs++ = 0;
+	}
+
+	/* Break if delta > ns */
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Otherwise back to recalculating delta */
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	obj.offset = obj.handle << 12;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void tslog(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+	enum { INC, MASK, ADDR };
+	uint32_t *timestamp_lo, *addr_lo;
+	uint32_t *map, *cs;
+
+	igt_require(base);
+
+	map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	cs = map + 512;
+
+	/* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_TIMESTAMP;
+	timestamp_lo = cs;
+	*cs++ = addr;
+	*cs++ = addr >> 32;
+
+	/* Load the address + inc & mask variables */
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR);
+	addr_lo = cs;
+	*cs++ = addr;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR) + 4;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(INC);
+	*cs++ = 4;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(INC) + 4;
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK);
+	*cs++ = 0xfffff7ff;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK) + 4;
+	*cs++ = 0xffffffff;
+
+	/* Increment the [ring] address for saving CS_TIMESTAMP */
+	*cs++ = MI_MATH(8);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+	*cs++ = MI_MATH_AND;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+	/* Rewrite the batch buffer for the next execution */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(timestamp_lo);
+	*cs++ = addr >> 32;
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(addr_lo);
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	obj.offset = obj.handle << 12;
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	tslog(i915, e, obj.handle, obj.offset);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const uint32_t *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static bool has_ctx_timestamp(int i915, const struct intel_execution_engine2 *e)
+{
+	const int gen = intel_gen(intel_get_drm_devid(i915));
+
+	if (gen == 8 && e->class == I915_ENGINE_CLASS_VIDEO)
+		return false; /* looks fubar */
+
+	return true;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+	const struct intel_execution_engine2 *e;
+	unsigned int count = 0;
+
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		count++;
+	}
+	if (!count)
+		return *not;
+
+	count = rand() % count;
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		if (!count--)
+			break;
+	}
+
+	return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeline,
+		       uint32_t common,
+		       unsigned int flags,
+		       unsigned long *ctl,
+		       unsigned long *out)
+#define F_SYNC		(1 << 0)
+#define F_PACE		(1 << 1)
+#define F_FLOW		(1 << 2)
+#define F_HALF		(1 << 3)
+#define F_SOLO		(1 << 4)
+#define F_SPARE		(1 << 5)
+#define F_NEXT		(1 << 6)
+#define F_VIP		(1 << 7)
+#define F_RRUL		(1 << 8)
+#define F_SHARE		(1 << 9)
+#define F_PING		(1 << 10)
+#define F_THROTTLE	(1 << 11)
+#define F_ISOLATE	(1 << 12)
+{
+	const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+	struct drm_i915_gem_exec_object2 obj[4] = {
+		{},
+		{
+			.handle = common ?: gem_create(i915, 4096),
+		},
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+	};
+	struct intel_execution_engine2 ping = *e;
+	int p_fence = -1, n_fence = -1;
+	unsigned long count = 0;
+	int n;
+
+	srandom(getpid());
+	if (flags & F_PING)
+		ping = pick_random_engine(i915, e);
+	obj[0] = tslog_create(i915, ctx, &ping);
+
+	while (!READ_ONCE(*ctl)) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 4,
+			.rsvd1 = ctx,
+			.rsvd2 = -1,
+			.flags = e->flags,
+		};
+
+		if (flags & F_FLOW) {
+			unsigned int seq;
+
+			seq = count;
+			if (flags & F_NEXT)
+				seq++;
+
+			execbuf.rsvd2 =
+				sw_sync_timeline_create_fence(timeline, seq);
+			execbuf.flags |= I915_EXEC_FENCE_IN;
+		}
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+		for (n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+		close(execbuf.rsvd2);
+
+		execbuf.buffer_count = 1;
+		execbuf.batch_start_offset = 2048;
+		execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+		execbuf.rsvd2 = n_fence;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACE && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+
+		if (flags & F_SYNC) {
+			struct pollfd pfd = {
+				.fd = n_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+
+		if (flags & F_THROTTLE)
+			igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+		igt_swap(obj[2], obj[3]);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	close(p_fence);
+
+	gem_close(i915, obj[3].handle);
+	gem_close(i915, obj[2].handle);
+	if (obj[1].handle != common)
+		gem_close(i915, obj[1].handle);
+
+	gem_sync(i915, obj[0].handle);
+	if (out) {
+		uint32_t *map;
+
+		map = gem_mmap__device_coherent(i915, obj[0].handle,
+						0, 4096, PROT_WRITE);
+		for (n = 1; n < min(count, 512); n++) {
+			igt_assert(map[n]);
+			map[n - 1] = map[n] - map[n - 1];
+		}
+		qsort(map, --n, sizeof(*map), cmp_u32);
+		*out = ticks_to_ns(i915, map[n / 2]);
+		munmap(map, 4096);
+	}
+	gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+	uint64_t cpu_time = 0;
+
+	cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+	cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+	return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+	struct timespec tv = { .tv_nsec = delay_ns };
+	nanosleep(&tv, NULL);
+	sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+	unsigned long *result;
+	uint32_t common = 0;
+
+	igt_require(has_ctx_timestamp(i915, e));
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+	if (flags & F_SHARE)
+		common = gem_create(i915, 4095);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 64; n <<= 1) { /* 32 == 500us per client */
+		int timeline = sw_sync_timeline_create();
+		int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+		const int nchild = n - 1; /* odd for easy medians */
+		const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+		const int lo = nchild / 4;
+		const int hi = (3 * nchild + 3) / 4 - 1;
+		struct rusage old_usage, usage;
+		uint64_t cpu_time, d_time;
+		unsigned long vip = -1;
+		struct timespec tv;
+		struct igt_mean m;
+
+		if (flags & F_PING) {
+			struct intel_execution_engine2 *ping;
+
+			__for_each_physical_engine(i915, ping) {
+				if (ping->flags == e->flags)
+					continue;
+
+				igt_fork(child, 1) {
+					uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+					fair_child(i915, ctx, ping,
+						   child_ns / 8,
+						   -1, common,
+						   F_SOLO | F_PACE | F_SHARE,
+						   &result[nchild],
+						   NULL);
+
+					gem_context_destroy(i915, ctx);
+				}
+			}
+		}
+
+		memset(result, 0, (nchild + 1) * sizeof(result[0]));
+		getrusage(RUSAGE_CHILDREN, &old_usage);
+		igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+		igt_fork(child, nchild) {
+			uint32_t ctx;
+
+			if (flags & F_ISOLATE) {
+				int clone, dmabuf = -1;
+
+				if (common)
+					dmabuf = prime_handle_to_fd(i915, common);
+
+				clone = gem_reopen_driver(i915);
+				gem_context_copy_engines(i915, 0, clone, 0);
+				i915 = clone;
+
+				if (dmabuf != -1)
+					common = prime_fd_to_handle(i915, dmabuf);
+			}
+
+			ctx = gem_context_clone_with_engines(i915, 0);
+
+			if (flags & F_VIP && child == 0) {
+				gem_context_set_priority(i915, ctx, MAX_PRIO);
+				flags |= F_FLOW;
+			}
+			if (flags & F_RRUL && child == 0)
+				flags |= F_SOLO | F_FLOW | F_SYNC;
+
+			fair_child(i915, ctx, e, child_ns,
+				   timeline, common, flags,
+				   &result[nchild],
+				   &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		while (nfences--)
+			timeline_advance(timeline, fence_ns);
+
+		result[nchild] = 1;
+		for (int child = 0; child < nchild; child++) {
+			while (!READ_ONCE(result[child]))
+				timeline_advance(timeline, fence_ns);
+		}
+
+		igt_waitchildren();
+		close(timeline);
+
+		/* Are we running out of CPU time, and fail to submit frames? */
+		d_time = igt_nsec_elapsed(&tv);
+		getrusage(RUSAGE_CHILDREN, &usage);
+		cpu_time = d_cpu_time(&usage, &old_usage);
+		if (10 * cpu_time > 9 * d_time) {
+			if (nchild > 7)
+				break;
+
+			igt_skip_on_f(10 * cpu_time > 9 * d_time,
+				      "%.0f%% CPU usage, presuming capacity exceeded\n",
+				      100. * cpu_time / d_time);
+		}
+
+		igt_mean_init(&m);
+		for (int child = 0; child < nchild; child++)
+			igt_mean_add(&m, result[child]);
+
+		if (flags & (F_VIP | F_RRUL))
+			vip = result[0];
+
+		qsort(result, nchild, sizeof(*result), cmp_ul);
+		igt_info("%2d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+			 nchild,
+			 1e-6 * result[0],  1e-6 * result[nchild - 1],
+			 1e-6 * result[lo], 1e-6 * result[hi],
+			 1e-6 * result[nchild / 2],
+			 1e-6 * igt_mean_get(&m),
+			 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+		if (vip != -1) {
+			igt_info("VIP interval %.2f ms\n", 1e-6 * vip);
+			igt_assert(4 * vip > 3 * fence_ns &&
+				   3 * vip < 4 * fence_ns);
+		}
+
+		/* May be slowed due to sheer volume of context switches */
+		igt_assert(4 * igt_mean_get(&m) > 3 * fence_ns &&
+			       igt_mean_get(&m) < 3 * fence_ns);
+
+		igt_assert(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+			   3 * igt_mean_get(&m) < 4 * result[nchild / 2]);
+
+		igt_assert(2 * (result[hi] - result[lo]) < result[nchild / 2]);
+	}
+
+	munmap(result, 4096);
+	if (common)
+		gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+	static const struct {
+		const char *name;
+		unsigned int flags;
+	} fair[] = {
+		/*
+		 * none - maximal greed in each client
+		 *
+		 * Push as many frames from each client as fast as possible
+		 */
+		{ "none",       0 },
+		{ "none-vip",   F_VIP }, /* one vip client must meet deadlines */
+		{ "none-solo",  F_SOLO }, /* 1 batch per frame per client */
+		{ "none-share", F_SHARE }, /* read from a common buffer */
+		{ "none-rrul",  F_RRUL }, /* "realtime-response under load" */
+		{ "none-ping",  F_PING }, /* measure inter-engine fairness */
+
+		/*
+		 * throttle - original per client throttling
+		 *
+		 * Used for front buffering rendering where there is no
+		 * extenal frame marker. Each client tries to only keep
+		 * 20ms of work submitted, though that measurement is
+		 * flawed...
+		 *
+		 * This is used by Xory to try and maintain some resembalance
+		 * of input/output consistency when being feed a continuous
+		 * stream of X11 draw requests straight into scanout, where
+		 * the clients may submit the work faster than can be drawn.
+		 *
+		 * Throttling tracks requests per-file (and assumes that
+		 * all requests are in submission order across the whole file),
+		 * so we split each child to its own fd.
+		 */
+		{ "throttle",       F_THROTTLE | F_ISOLATE },
+		{ "throttle-vip",   F_THROTTLE | F_ISOLATE | F_VIP },
+		{ "throttle-solo",  F_THROTTLE | F_ISOLATE | F_SOLO },
+		{ "throttle-share", F_THROTTLE | F_ISOLATE | F_SHARE },
+		{ "throttle-rrul",  F_THROTTLE | F_ISOLATE | F_RRUL },
+
+		/*
+		 * pace - mesa "submit double buffering"
+		 *
+		 * Submit a frame, wait for previous frame to start. This
+		 * prevents each client from getting too far ahead of its
+		 * rendering, maintaining a consistent input/output latency.
+		 */
+		{ "pace",       F_PACE },
+		{ "pace-solo",  F_PACE | F_SOLO},
+		{ "pace-share", F_PACE | F_SHARE},
+		{ "pace-ping",  F_PACE | F_SHARE | F_PING},
+
+		/* sync - only submit a frame at a time */
+		{ "sync",      F_SYNC },
+		{ "sync-vip",  F_SYNC | F_VIP },
+		{ "sync-solo", F_SYNC | F_SOLO },
+
+		/* flow - synchronise execution against the clock (vblank) */
+		{ "flow",       F_PACE | F_FLOW },
+		{ "flow-share", F_PACE | F_FLOW | F_SHARE },
+		{ "flow-ping",  F_PACE | F_FLOW | F_SHARE | F_PING },
+
+		/* next - submit ahead of the clock (vblank double buffering) */
+		{ "next",       F_PACE | F_FLOW | F_NEXT },
+		{ "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+		{ "next-ping",  F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+		/* spare - underutilise by a single client timeslice */
+		{ "spare", F_PACE | F_FLOW | F_SPARE },
+
+		/* half - run at half pace (submit 16ms of work every 32ms) */
+		{ "half",  F_PACE | F_FLOW | F_HALF },
+
+		{}
+	};
+
+	for (typeof(*fair) *f = fair; f->name; f++) {
+		igt_subtest_with_dynamic_f("fair-%s", f->name)  {
+			const struct intel_execution_engine2 *e;
+
+			igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+			__for_each_physical_engine(i915, e) {
+				if (!gem_class_can_store_dword(i915, e->class))
+					continue;
+
+				igt_dynamic_f("%s", e->name)
+					fairness(i915, e, timeout, f->flags);
+			}
+		}
+	}
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+				   uint32_t ctx,
+				   const struct intel_execution_engine2 *e)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+		.offset = 32 << 20,
+		.relocs_ptr = to_user_pointer(&reloc),
+		.relocation_count = 1,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = e->flags,
+		.rsvd1 = ctx,
+	};
+#define RUNTIME (base + 0x3a8)
+	uint32_t *map, *cs;
+	uint32_t ts;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, obj.handle,
+					     0, 4096, PROT_WRITE);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = RUNTIME;
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj.handle;
+	reloc.presumed_offset = obj.offset;
+	reloc.offset = offset_in_page(cs);
+	reloc.delta = 4000;
+	*cs++ = obj.offset + 4000;
+	*cs++ = obj.offset >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+
+	ts = map[1000];
+	munmap(map, 4096);
+
+	return ts;
+}
+
+static void fairslice(int i915, const struct intel_execution_engine2 *e)
+{
+	igt_spin_t *spin[3];
+	uint32_t ctx[3];
+	uint32_t ts[3];
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		ctx[i] = gem_context_clone_with_engines(i915, 0);
+		spin[i] = igt_spin_new(i915, .ctx = ctx[i], .engine = e->flags);
+	}
+
+	sleep(2); /* over the course of many timeslices */
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		igt_assert(gem_bo_busy(i915, spin[i]->handle));
+		igt_spin_end(spin[i]);
+
+		ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		igt_spin_free(i915, spin[i]);
+		gem_context_destroy(i915, ctx[i]);
+	}
+
+	qsort(ts, 3, sizeof(*ts), cmp_u32);
+	igt_info("%s: [%.1f, %.1f] ms\n", e->name,
+		 1e-6 * ticks_to_ns(i915, ts[0]),
+		 1e-6 * ticks_to_ns(i915, ts[2]));
+
+	igt_assert(ts[0] && ts[2] > ts[0]);
+	igt_assert(4 * ts[0] > 3 * ts[2]);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2569,6 +3364,25 @@ igt_main
 		test_each_engine("lateslice", fd, e)
 			lateslice(fd, e->flags);
 
+		igt_subtest_group {
+			igt_fixture {
+				igt_require(gem_scheduler_has_semaphores(fd));
+				igt_require(gem_scheduler_has_preemption(fd));
+				igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+			}
+
+			test_each_engine("fairslice", fd, e)
+				fairslice(fd, e);
+
+			igt_subtest("fairslice-all")  {
+				__for_each_physical_engine(fd, e) {
+					igt_fork(child, 1)
+						fairslice(fd, e);
+				}
+				igt_waitchildren();
+			}
+		}
+
 		test_each_engine("submit-early-slice", fd, e)
 			submit_slice(fd, e, EARLY_SUBMIT);
 		test_each_engine("submit-golden-slice", fd, e)
@@ -2597,6 +3411,8 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_fairness(fd, 2);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.28.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-22 19:08 Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-22 19:08 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 782 +++++++++++++++++++++++++++++++++
 1 file changed, 782 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 931b1245f..fae04536c 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
 #include <sys/poll.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <sys/resource.h>
 #include <sys/syscall.h>
 #include <sched.h>
 #include <signal.h>
@@ -2501,6 +2502,766 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+	return div64_u64_round_up(ticks * NSEC_PER_SEC,
+				  read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr,
+		  uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	/* Loop until CTX_TIMESTAMP - initial > @ns */
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(START_TS);
+
+	while (offset_in_page(cs) & 63)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(NOW_TS);
+
+	/* delta = now - start; inverted to match COND_BBE */
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	/* Save delta for reading by COND_BBE */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Delay between SRM and COND_BBE to post the writes */
+	for (int n = 0; n < 8; n++) {
+		*cs++ = MI_STORE_DWORD_IMM;
+		if (use_64b) {
+			*cs++ = addr + 4064;
+			*cs++ = addr >> 32;
+		} else {
+			*cs++ = 0;
+			*cs++ = addr + 4064;
+		}
+		*cs++ = 0;
+	}
+
+	/* Break if delta > ns */
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Otherwise back to recalculating delta */
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void tslog(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+	enum { INC, MASK, ADDR };
+	uint32_t *timestamp_lo, *addr_lo;
+	uint32_t *map, *cs;
+
+	igt_require(base);
+
+	map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	cs = map + 512;
+
+	/* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_TIMESTAMP;
+	timestamp_lo = cs;
+	*cs++ = addr;
+	*cs++ = addr >> 32;
+
+	/* Load the address + inc & mask variables */
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR);
+	addr_lo = cs;
+	*cs++ = addr;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR) + 4;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(INC);
+	*cs++ = 4;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(INC) + 4;
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK);
+	*cs++ = 0xfffff7ff;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK) + 4;
+	*cs++ = 0xffffffff;
+
+	/* Increment the [ring] address for saving CS_TIMESTAMP */
+	*cs++ = MI_MATH(8);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+	*cs++ = MI_MATH_AND;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+	/* Rewrite the batch buffer for the next execution */
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(timestamp_lo);
+	*cs++ = addr >> 32;
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(addr_lo);
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	tslog(i915, e, obj.handle, obj.offset);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const uint32_t *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+	const struct intel_execution_engine2 *e;
+	unsigned int count = 0;
+
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		count++;
+	}
+	if (!count)
+		return *not;
+
+	count = rand() % count;
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		if (!count--)
+			break;
+	}
+
+	return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeline,
+		       uint32_t common,
+		       unsigned int flags,
+		       unsigned long *ctl,
+		       unsigned long *out)
+#define F_SYNC		(1 << 0)
+#define F_PACE		(1 << 1)
+#define F_FLOW		(1 << 2)
+#define F_HALF		(1 << 3)
+#define F_SOLO		(1 << 4)
+#define F_SPARE		(1 << 5)
+#define F_NEXT		(1 << 6)
+#define F_VIP		(1 << 7)
+#define F_RRUL		(1 << 8)
+#define F_SHARE		(1 << 9)
+#define F_PING		(1 << 10)
+#define F_THROTTLE	(1 << 11)
+{
+	const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+	struct drm_i915_gem_exec_object2 obj[4] = {
+		{},
+		{
+			.handle = common ?: gem_create(i915, 4096),
+		},
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+	};
+	struct intel_execution_engine2 ping = *e;
+	int p_fence = -1, n_fence = -1;
+	unsigned long count = 0;
+	int n;
+
+	srandom(getpid());
+	if (flags & F_PING)
+		ping = pick_random_engine(i915, e);
+	obj[0] = tslog_create(i915, ctx, &ping);
+
+	while (!READ_ONCE(*ctl)) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 4,
+			.rsvd1 = ctx,
+			.rsvd2 = -1,
+			.flags = e->flags,
+		};
+
+		if (flags & F_FLOW) {
+			unsigned int seq;
+
+			seq = count;
+			if (flags & F_NEXT)
+				seq++;
+
+			execbuf.rsvd2 =
+				sw_sync_timeline_create_fence(timeline, seq);
+			execbuf.flags |= I915_EXEC_FENCE_IN;
+		}
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+		for (n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+		close(execbuf.rsvd2);
+
+		execbuf.buffer_count = 1;
+		execbuf.batch_start_offset = 2048;
+		execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+		execbuf.rsvd2 = n_fence;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACE && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+
+		if (flags & F_SYNC) {
+			struct pollfd pfd = {
+				.fd = n_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+
+		if (flags & F_THROTTLE)
+			igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+		igt_swap(obj[2], obj[3]);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	close(p_fence);
+
+	gem_close(i915, obj[3].handle);
+	gem_close(i915, obj[2].handle);
+	if (obj[1].handle != common)
+		gem_close(i915, obj[1].handle);
+
+	gem_sync(i915, obj[0].handle);
+	if (out) {
+		uint32_t *map;
+
+		map = gem_mmap__device_coherent(i915, obj[0].handle,
+						0, 4096, PROT_WRITE);
+		for (n = 1; n < min(count, 512); n++) {
+			igt_assert(map[n]);
+			map[n - 1] = map[n] - map[n - 1];
+		}
+		qsort(map, --n, sizeof(*map), cmp_u32);
+		*out = ticks_to_ns(i915, map[n / 2]);
+		munmap(map, 4096);
+	}
+	gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+	uint64_t cpu_time = 0;
+
+	cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+	cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+	return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+	struct timespec tv = { .tv_nsec = delay_ns };
+	nanosleep(&tv, NULL);
+	sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+	unsigned long *result;
+	uint32_t common = 0;
+
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+	if (flags & F_SHARE)
+		common = gem_create(i915, 4095);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 64; n <<= 1) { /* 32 == 500us per client */
+		int timeline = sw_sync_timeline_create();
+		int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+		const int nchild = n - 1; /* odd for easy medians */
+		const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+		const int lo = nchild / 4;
+		const int hi = (3 * nchild + 3) / 4 - 1;
+		struct rusage old_usage, usage;
+		uint64_t cpu_time, d_time;
+		unsigned long vip = -1;
+		struct timespec tv;
+		struct igt_mean m;
+
+		if (flags & F_PING) {
+			struct intel_execution_engine2 *ping;
+
+			__for_each_physical_engine(i915, ping) {
+				if (ping->flags == e->flags)
+					continue;
+
+				igt_fork(child, 1) {
+					uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+					fair_child(i915, ctx, ping,
+						   child_ns / 8,
+						   -1, common,
+						   F_SOLO | F_PACE | F_SHARE,
+						   &result[nchild],
+						   NULL);
+
+					gem_context_destroy(i915, ctx);
+				}
+			}
+		}
+
+		memset(result, 0, (nchild + 1) * sizeof(result[0]));
+		getrusage(RUSAGE_CHILDREN, &old_usage);
+		igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+			if (flags & F_VIP && child == 0) {
+				gem_context_set_priority(i915, ctx, MAX_PRIO);
+				flags |= F_FLOW;
+			}
+			if (flags & F_RRUL && child == 0)
+				flags |= F_SOLO | F_FLOW | F_SYNC;
+
+			fair_child(i915, ctx, e, child_ns,
+				   timeline, common, flags,
+				   &result[nchild],
+				   &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		while (nfences--)
+			timeline_advance(timeline, fence_ns);
+
+		result[nchild] = 1;
+		for (int child = 0; child < nchild; child++) {
+			while (!READ_ONCE(result[child]))
+				timeline_advance(timeline, fence_ns);
+		}
+
+		igt_waitchildren();
+		close(timeline);
+
+		/* Are we running out of CPU time, and fail to submit frames? */
+		d_time = igt_nsec_elapsed(&tv);
+		getrusage(RUSAGE_CHILDREN, &usage);
+		cpu_time = d_cpu_time(&usage, &old_usage);
+		if (10 * cpu_time > 9 * d_time) {
+			if (nchild > 7)
+				break;
+
+			igt_skip_on_f(10 * cpu_time > 9 * d_time,
+				      "%.0f%% CPU usage, presuming capacity exceeded\n",
+				      100. * cpu_time / d_time);
+		}
+
+		igt_mean_init(&m);
+		for (int child = 0; child < nchild; child++)
+			igt_mean_add(&m, result[child]);
+
+		if (flags & (F_VIP | F_RRUL))
+			vip = result[0];
+
+		qsort(result, nchild, sizeof(*result), cmp_ul);
+		igt_info("%2d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+			 nchild,
+			 1e-6 * result[0],  1e-6 * result[nchild - 1],
+			 1e-6 * result[lo], 1e-6 * result[hi],
+			 1e-6 * result[nchild / 2],
+			 1e-6 * igt_mean_get(&m),
+			 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+		if (vip != -1) {
+			igt_info("VIP interval %.2f ms\n", 1e-6 * vip);
+			igt_assert(4 * vip > 3 * fence_ns &&
+				   3 * vip < 4 * fence_ns);
+		}
+
+		/* May be slowed due to sheer volume of context switches */
+		igt_assert(4 * igt_mean_get(&m) > 3 * fence_ns &&
+			       igt_mean_get(&m) < 3 * fence_ns);
+
+		igt_assert(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+			   3 * igt_mean_get(&m) < 4 * result[nchild / 2]);
+
+		igt_assert(2 * (result[hi] - result[lo]) < result[nchild / 2]);
+	}
+
+	munmap(result, 4096);
+	if (common)
+		gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+	static const struct {
+		const char *name;
+		unsigned int flags;
+	} fair[] = {
+		/*
+		 * none - maximal greed in each client
+		 *
+		 * Push as many frames from each client as fast as possible
+		 */
+		{ "none",       0 },
+		{ "none-vip",   F_VIP }, /* one vip client must meet deadlines */
+		{ "none-solo",  F_SOLO }, /* 1 batch per frame per client */
+		{ "none-share", F_SHARE }, /* read from a common buffer */
+		{ "none-rrul",  F_RRUL }, /* "realtime-response under load" */
+		{ "none-ping",  F_PING }, /* measure inter-engine fairness */
+
+		/*
+		 * throttle - original per client throttling
+		 *
+		 * Used for front buffering rendering where there is no
+		 * extenal frame marker. Each client tries to only keep
+		 * 20ms of work submitted, though that measurement is
+		 * flawed...
+		 *
+		 * This is used by Xory to try and maintain some resembalance
+		 * of input/output consistency when being feed a continuous
+		 * stream of X11 draw requests straight into scanout, where
+		 * the clients may submit the work faster than can be drawn.
+		 */
+		{ "throttle",       F_THROTTLE },
+		{ "throttle-vip",   F_THROTTLE | F_VIP },
+		{ "throttle-solo",  F_THROTTLE | F_SOLO },
+		{ "throttle-share", F_THROTTLE | F_SHARE },
+		{ "throttle-rrul",  F_THROTTLE | F_RRUL },
+
+		/*
+		 * pace - mesa "submit double buffering"
+		 *
+		 * Submit a frame, wait for previous frame to start. This
+		 * prevents each client from getting too far ahead of its
+		 * rendering, maintaining a consistent input/output latency.
+		 */
+		{ "pace",       F_PACE },
+		{ "pace-solo",  F_PACE | F_SOLO},
+		{ "pace-share", F_PACE | F_SHARE},
+		{ "pace-ping",  F_PACE | F_SHARE | F_PING},
+
+		/* sync - only submit a frame at a time */
+		{ "sync",      F_SYNC },
+		{ "sync-vip",  F_SYNC | F_VIP },
+		{ "sync-solo", F_SYNC | F_SOLO },
+
+		/* flow - synchronise execution against the clock (vblank) */
+		{ "flow",       F_PACE | F_FLOW },
+		{ "flow-share", F_PACE | F_FLOW | F_SHARE },
+		{ "flow-ping",  F_PACE | F_FLOW | F_SHARE | F_PING },
+
+		/* next - submit ahead of the clock (vblank double buffering) */
+		{ "next",       F_PACE | F_FLOW | F_NEXT },
+		{ "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+		{ "next-ping",  F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+		/* spare - underutilise by a single client timeslice */
+		{ "spare", F_PACE | F_FLOW | F_SPARE },
+
+		/* half - run at half pace (submit 16ms of work every 32ms) */
+		{ "half",  F_PACE | F_FLOW | F_HALF },
+
+		{}
+	};
+
+	for (typeof(*fair) *f = fair; f->name; f++) {
+		igt_subtest_with_dynamic_f("fair-%s", f->name)  {
+			const struct intel_execution_engine2 *e;
+
+			igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+			__for_each_physical_engine(i915, e) {
+				if (!gem_class_can_store_dword(i915, e->class))
+					continue;
+
+				igt_dynamic_f("%s", e->name)
+					fairness(i915, e, timeout, f->flags);
+			}
+		}
+	}
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+				   uint32_t ctx,
+				   const struct intel_execution_engine2 *e)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+		.offset = 32 << 20,
+		.relocs_ptr = to_user_pointer(&reloc),
+		.relocation_count = 1,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = e->flags,
+		.rsvd1 = ctx,
+	};
+#define RUNTIME (base + 0x3a8)
+	uint32_t *map, *cs;
+	uint32_t ts;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, obj.handle,
+					     0, 4096, PROT_WRITE);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = RUNTIME;
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj.handle;
+	reloc.presumed_offset = obj.offset;
+	reloc.offset = offset_in_page(cs);
+	reloc.delta = 4000;
+	*cs++ = obj.offset + 4000;
+	*cs++ = obj.offset >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+
+	ts = map[1000];
+	munmap(map, 4096);
+
+	return ts;
+}
+
+static void fairslice(int i915, const struct intel_execution_engine2 *e)
+{
+	igt_spin_t *spin[3];
+	uint32_t ctx[3];
+	uint32_t ts[3];
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		ctx[i] = gem_context_clone_with_engines(i915, 0);
+		spin[i] = igt_spin_new(i915, .ctx = ctx[i], .engine = e->flags);
+	}
+
+	sleep(2); /* over the course of many timeslices */
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		igt_assert(gem_bo_busy(i915, spin[i]->handle));
+		igt_spin_end(spin[i]);
+
+		ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		igt_spin_free(i915, spin[i]);
+		gem_context_destroy(i915, ctx[i]);
+	}
+
+	qsort(ts, 3, sizeof(*ts), cmp_u32);
+	igt_info("%s: [%.1f, %.1f] ms\n", e->name,
+		 1e-6 * ticks_to_ns(i915, ts[0]),
+		 1e-6 * ticks_to_ns(i915, ts[2]));
+
+	igt_assert(ts[0] && ts[2] > ts[0]);
+	igt_assert(4 * ts[0] > 3 * ts[2]);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2567,6 +3328,25 @@ igt_main
 		test_each_engine("lateslice", fd, e)
 			lateslice(fd, e->flags);
 
+		igt_subtest_group {
+			igt_fixture {
+				igt_require(gem_scheduler_has_semaphores(fd));
+				igt_require(gem_scheduler_has_preemption(fd));
+				igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+			}
+
+			test_each_engine("fairslice", fd, e)
+				fairslice(fd, e);
+
+			igt_subtest("fairslice-all")  {
+				__for_each_physical_engine(fd, e) {
+					igt_fork(child, 1)
+						fairslice(fd, e);
+				}
+				igt_waitchildren();
+			}
+		}
+
 		test_each_engine("submit-early-slice", fd, e)
 			submit_slice(fd, e, EARLY_SUBMIT);
 		test_each_engine("submit-golden-slice", fd, e)
@@ -2595,6 +3375,8 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_fairness(fd, 2);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-09 12:45 Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-09 12:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 699 +++++++++++++++++++++++++++++++++
 1 file changed, 699 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..b3a1fedaa 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
 #include <sys/poll.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
+#include <sys/resource.h>
 #include <sys/syscall.h>
 #include <sched.h>
 #include <signal.h>
@@ -2495,6 +2496,666 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+	return div64_u64_round_up(ticks * NSEC_PER_SEC,
+				  read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr,
+		  uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(START_TS);
+
+	while (offset_in_page(cs) & 63)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = RUNTIME;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	/* Delay between SRM and COND_BBE to post the writes */
+	for (int n = 0; n < 8; n++) {
+		*cs++ = MI_STORE_DWORD_IMM;
+		if (use_64b) {
+			*cs++ = addr + 4064;
+			*cs++ = addr >> 32;
+		} else {
+			*cs++ = 0;
+			*cs++ = addr + 4064;
+		}
+		*cs++ = 0;
+	}
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void tslog(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+	enum { ONE, MASK, ADDR };
+	uint32_t *timestamp_lo, *addr_lo;
+	uint32_t *map, *cs;
+
+	igt_require(base);
+
+	map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	cs = map + 512;
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_TIMESTAMP;
+	timestamp_lo = cs;
+	*cs++ = addr;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR);
+	addr_lo = cs;
+	*cs++ = addr;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR) + 4;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ONE);
+	*cs++ = 4;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ONE) + 4;
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK);
+	*cs++ = 0xfffff7ff;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK) + 4;
+	*cs++ = 0xffffffff;
+
+	*cs++ = MI_MATH(8);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+	*cs++ = MI_MATH_AND;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(timestamp_lo);
+	*cs++ = addr >> 32;
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(addr_lo);
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	tslog(i915, e, obj.handle, obj.offset);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const uint32_t *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+	const struct intel_execution_engine2 *e;
+	unsigned int count = 0;
+
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		count++;
+	}
+	if (!count)
+		return *not;
+
+	count = rand() % count;
+	__for_each_physical_engine(i915, e) {
+		if (e->flags == not->flags)
+			continue;
+		if (!gem_class_has_mutable_submission(i915, e->class))
+			continue;
+		if (!count--)
+			break;
+	}
+
+	return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeline,
+		       uint32_t common,
+		       unsigned int flags,
+		       unsigned long *ctl,
+		       unsigned long *out)
+#define F_SYNC  (1 << 0)
+#define F_PACE  (1 << 1)
+#define F_FLOW  (1 << 2)
+#define F_HALF  (1 << 3)
+#define F_SOLO  (1 << 4)
+#define F_SPARE (1 << 5)
+#define F_NEXT  (1 << 6)
+#define F_VIP   (1 << 7)
+#define F_RRUL  (1 << 8)
+#define F_SHARE (1 << 9)
+#define F_PING  (1 << 10)
+{
+	const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+	struct drm_i915_gem_exec_object2 obj[4] = {
+		{},
+		{
+			.handle = common ?: gem_create(i915, 4096),
+		},
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+	};
+	struct intel_execution_engine2 ping = *e;
+	int p_fence = -1, n_fence = -1;
+	unsigned long count = 0;
+	int n;
+
+	srandom(getpid());
+	if (flags & F_PING)
+		ping = pick_random_engine(i915, e);
+	obj[0] = tslog_create(i915, ctx, &ping);
+
+	while (!READ_ONCE(*ctl)) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(obj),
+			.buffer_count = 4,
+			.rsvd1 = ctx,
+			.rsvd2 = -1,
+			.flags = e->flags,
+		};
+
+		if (flags & F_FLOW) {
+			unsigned int seq;
+
+			seq = count;
+			if (flags & F_NEXT)
+				seq++;
+
+			execbuf.rsvd2 =
+				sw_sync_timeline_create_fence(timeline, seq);
+			execbuf.flags |= I915_EXEC_FENCE_IN;
+		}
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+		for (n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+		close(execbuf.rsvd2);
+
+		execbuf.buffer_count = 1;
+		execbuf.batch_start_offset = 2048;
+		execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+		execbuf.rsvd2 = n_fence;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACE && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+
+		if (flags & F_SYNC) {
+			struct pollfd pfd = {
+				.fd = n_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+
+		igt_swap(obj[2], obj[3]);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	close(p_fence);
+
+	gem_close(i915, obj[3].handle);
+	gem_close(i915, obj[2].handle);
+	if (obj[1].handle != common)
+		gem_close(i915, obj[1].handle);
+
+	gem_sync(i915, obj[0].handle);
+	if (out) {
+		uint32_t *map;
+
+		map = gem_mmap__device_coherent(i915, obj[0].handle,
+						0, 4096, PROT_WRITE);
+		for (n = 1; n < min(count, 512); n++) {
+			igt_assert(map[n]);
+			map[n - 1] = map[n] - map[n - 1];
+		}
+		qsort(map, --n, sizeof(*map), cmp_u32);
+		*out = ticks_to_ns(i915, map[n / 2]);
+		munmap(map, 4096);
+	}
+	gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+	uint64_t cpu_time = 0;
+
+	cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+	cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+	cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+	return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+	struct timespec tv = { .tv_nsec = delay_ns };
+	nanosleep(&tv, NULL);
+	sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+	unsigned long *result;
+	uint32_t common = 0;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+	if (flags & F_SHARE)
+		common = gem_create(i915, 4095);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 64; n <<= 1) { /* 32 == 500us per client */
+		int timeline = sw_sync_timeline_create();
+		int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+		const int nchild = n - 1; /* odd for easy medians */
+		const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+		const int lo = nchild / 4;
+		const int hi = (3 * nchild + 3) / 4 - 1;
+		struct rusage old_usage, usage;
+		uint64_t cpu_time, d_time;
+		unsigned long vip = -1;
+		struct timespec tv;
+		struct igt_mean m;
+
+		if (flags & F_PING) {
+			struct intel_execution_engine2 *ping;
+
+			__for_each_physical_engine(i915, ping) {
+				if (ping->flags == e->flags)
+					continue;
+
+				igt_fork(child, 1) {
+					uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+					fair_child(i915, ctx, ping,
+						   child_ns / 8,
+						   -1, common,
+						   F_SOLO | F_PACE | F_SHARE,
+						   &result[nchild],
+						   NULL);
+
+					gem_context_destroy(i915, ctx);
+				}
+			}
+		}
+
+		memset(result, 0, (nchild + 1) * sizeof(result[0]));
+		getrusage(RUSAGE_CHILDREN, &old_usage);
+		igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+			if (flags & F_VIP && child == 0) {
+				gem_context_set_priority(i915, ctx, MAX_PRIO);
+				flags |= F_FLOW;
+			}
+			if (flags & F_RRUL && child == 0)
+				flags |= F_SOLO | F_FLOW | F_SYNC;
+
+			fair_child(i915, ctx, e, child_ns,
+				   timeline, common, flags,
+				   &result[nchild],
+				   &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		while (nfences--)
+			timeline_advance(timeline, fence_ns);
+
+		result[nchild] = 1;
+		for (int child = 0; child < nchild; child++) {
+			while (!READ_ONCE(result[child]))
+				timeline_advance(timeline, fence_ns);
+		}
+
+		igt_waitchildren();
+		close(timeline);
+
+		d_time = igt_nsec_elapsed(&tv);
+		getrusage(RUSAGE_CHILDREN, &usage);
+		cpu_time = d_cpu_time(&usage, &old_usage);
+		if (10 * cpu_time > 9 * d_time) {
+			if (nchild > 7)
+				break;
+
+			igt_skip_on_f(10 * cpu_time > 9 * d_time,
+				      "%.0f%% CPU usage, presuming capacity exceeded\n",
+				      100.* cpu_time / d_time);
+		}
+
+		igt_mean_init(&m);
+		for (int child = 0; child < nchild; child++)
+			igt_mean_add(&m, result[child]);
+
+		if (flags & (F_VIP | F_RRUL))
+			vip = result[0];
+
+		qsort(result, nchild, sizeof(*result), cmp_ul);
+		igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+			 nchild,
+			 1e-6 * result[0],  1e-6 * result[nchild - 1],
+			 1e-6 * result[lo], 1e-6 * result[hi],
+			 1e-6 * result[nchild / 2],
+			 1e-6 * igt_mean_get(&m),
+			 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+		if (vip != -1) {
+			igt_info("VIP interval %.2f ms\n", 1e-6 * vip);
+			igt_assert(4 * vip > 3 * fence_ns &&
+				   3 * vip < 4 * fence_ns);
+		}
+
+		/* May be slowed due to sheer volume of context switches */
+		igt_assert(4 * igt_mean_get(&m) > 3 * fence_ns &&
+			       igt_mean_get(&m) < 3 * fence_ns);
+
+		igt_assert(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+			   3 * igt_mean_get(&m) < 4 * result[nchild / 2]);
+
+		igt_assert(2 * (result[hi] - result[lo]) < result[nchild / 2]);
+	}
+
+	munmap(result, 4096);
+	if (common)
+		gem_close(i915, common);
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+				   uint32_t ctx,
+				   const struct intel_execution_engine2 *e)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = gem_create(i915, 4096),
+		.offset = 32 << 20,
+		.relocs_ptr = to_user_pointer(&reloc),
+		.relocation_count = 1,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.flags = e->flags,
+		.rsvd1 = ctx,
+	};
+#define RUNTIME (base + 0x3a8)
+	uint32_t *map, *cs;
+	uint32_t ts;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, obj.handle,
+					     0, 4096, PROT_WRITE);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = RUNTIME;
+	memset(&reloc, 0, sizeof(reloc));
+	reloc.target_handle = obj.handle;
+	reloc.presumed_offset = obj.offset;
+	reloc.offset = offset_in_page(cs);
+	reloc.delta = 4000;
+	*cs++ = obj.offset + 4000;
+	*cs++ = obj.offset >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+	gem_close(i915, obj.handle);
+
+	ts = map[1000];
+	munmap(map, 4096);
+
+	return ts;
+}
+
+static void fairslice(int i915, const struct intel_execution_engine2 *e)
+{
+	igt_spin_t *spin[3];
+	uint32_t ctx[3];
+	uint32_t ts[3];
+
+	igt_require(gem_scheduler_has_semaphores(i915));
+	igt_require(gem_scheduler_has_preemption(i915));
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		ctx[i] = gem_context_clone_with_engines(i915, 0);
+		spin[i] = igt_spin_new(i915, .ctx = ctx[i], .engine = e->flags);
+	}
+
+	sleep(2); /* over the course of many timeslices */
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		igt_assert(gem_bo_busy(i915, spin[i]->handle));
+		igt_spin_end(spin[i]);
+
+		ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+		igt_spin_free(i915, spin[i]);
+		gem_context_destroy(i915, ctx[i]);
+	}
+
+	qsort(ts, 3, sizeof(*ts), cmp_u32);
+	igt_info("%s: [%.1f, %.1f] ms\n", e->name,
+		 1e-6 * ticks_to_ns(i915, ts[0]),
+		 1e-6 * ticks_to_ns(i915, ts[2]));
+
+	igt_assert(ts[0] && ts[2] > ts[0]);
+	igt_assert(4 * ts[0] > 3 * ts[2]);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2561,6 +3222,9 @@ igt_main
 		test_each_engine("lateslice", fd, e)
 			lateslice(fd, e->flags);
 
+		test_each_engine("fairslice", fd, e)
+			fairslice(fd, e);
+
 		test_each_engine("submit-early-slice", fd, e)
 			submit_slice(fd, e, EARLY_SUBMIT);
 		test_each_engine("submit-golden-slice", fd, e)
@@ -2589,6 +3253,41 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fair-none", fd, e)
+			fairness(fd, e, 2, 0);
+		test_each_engine_store("fair-none-vip", fd, e)
+			fairness(fd, e, 2, F_VIP);
+		test_each_engine_store("fair-none-share", fd, e)
+			fairness(fd, e, 2, F_SHARE);
+		test_each_engine_store("fair-none-rrul", fd, e)
+			fairness(fd, e, 2, F_RRUL);
+		test_each_engine_store("fair-none-ping", fd, e)
+			fairness(fd, e, 2, F_PING);
+		test_each_engine_store("fair-pace", fd, e)
+			fairness(fd, e, 2, F_PACE);
+		test_each_engine_store("fair-pace-share", fd, e)
+			fairness(fd, e, 2, F_PACE | F_SHARE);
+		test_each_engine_store("fair-pace-ping", fd, e)
+			fairness(fd, e, 2, F_PACE | F_SHARE | F_PING);
+		test_each_engine_store("fair-sync", fd, e)
+			fairness(fd, e, 2, F_SYNC);
+		test_each_engine_store("fair-sync-vip", fd, e)
+			fairness(fd, e, 2, F_SYNC | F_VIP);
+		test_each_engine_store("fair-solo", fd, e)
+			fairness(fd, e, 2, F_SYNC | F_SOLO);
+		test_each_engine_store("fair-flow", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW);
+		test_each_engine_store("fair-flow-ping", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW | F_PING);
+		test_each_engine_store("fair-next", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW | F_NEXT);
+		test_each_engine_store("fair-next-share", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW | F_NEXT | F_SHARE);
+		test_each_engine_store("fair-spare", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW | F_SPARE);
+		test_each_engine_store("fair-half", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW | F_HALF);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
  2020-06-02  8:22 Chris Wilson
  2020-06-02  8:32 ` Chris Wilson
@ 2020-06-02  8:50 ` Chris Wilson
  1 sibling, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-02  8:50 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 442 +++++++++++++++++++++++++++++++++
 1 file changed, 442 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..ced9ee571 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,433 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+	return div64_u64_round_up(ticks * NSEC_PER_SEC,
+				  read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr,
+		  uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
+
+	if (offset_in_page(cs) & 4)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void tslog(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+	enum { ONE, MASK, ADDR };
+	uint32_t *timestamp_lo, *addr_lo;
+	uint32_t *map, *cs;
+
+	igt_require(base);
+
+	map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	cs = map + 512;
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_TIMESTAMP;
+	timestamp_lo = cs;
+	*cs++ = addr;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR);
+	addr_lo = cs;
+	*cs++ = addr;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR) + 4;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ONE);
+	*cs++ = 4;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ONE) + 4;
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK);
+	*cs++ = 0xfffff7ff;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK) + 4;
+	*cs++ = 0xffffffff;
+
+	*cs++ = MI_MATH(8);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+	*cs++ = MI_MATH_AND;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(timestamp_lo);
+	*cs++ = addr >> 32;
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(addr_lo);
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	tslog(i915, e, obj.handle, obj.offset);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeout,
+		       int timeline,
+		       unsigned int flags,
+		       unsigned long *ctl,
+		       unsigned long *out)
+#define F_SYNC  (1 << 0)
+#define F_PACE  (1 << 1)
+#define F_FLOW  (1 << 2)
+#define F_HALF  (1 << 3)
+#define F_SOLO  (1 << 4)
+#define F_SPARE (1 << 8)
+{
+	const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+	struct drm_i915_gem_exec_object2 prev =
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 next =
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e);
+	int p_fence = -1, n_fence = -1;
+	unsigned long count = 0;
+	uint32_t *map;
+	int n;
+
+	while (!READ_ONCE(*ctl)) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&next),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+			.rsvd2 = -1,
+			.flags = e->flags,
+		};
+
+		if (flags & F_FLOW) {
+			execbuf.rsvd2 =
+				sw_sync_timeline_create_fence(timeline, count);
+			execbuf.flags |= I915_EXEC_FENCE_IN;
+		}
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+		for (n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+
+		execbuf.buffers_ptr = to_user_pointer(&ts);
+		execbuf.batch_start_offset = 2048;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACE && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+		close(execbuf.rsvd2);
+
+		if (flags & F_SYNC) {
+			struct pollfd pfd = {
+				.fd = n_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+
+		igt_swap(prev, next);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	close(p_fence);
+
+	gem_close(i915, next.handle);
+	gem_close(i915, prev.handle);
+
+	gem_sync(i915, ts.handle);
+	map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE);
+	for (n = 1; n < min(count, 512); n++) {
+		igt_assert(map[n]);
+		map[n - 1] = map[n] - map[n - 1];
+	}
+	qsort(map, --n, sizeof(*map), cmp_u32);
+	*out = ticks_to_ns(i915, map[n / 2]);
+	munmap(map, 4096);
+
+	gem_close(i915, ts.handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+	unsigned long *result;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+	igt_require(e->class == I915_ENGINE_CLASS_RENDER || /* XXX excuse me? */
+		    intel_gen(intel_get_drm_devid(i915)) < 11);
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 16; n <<= 1) {
+		int timeline = sw_sync_timeline_create();
+		int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+		const int nchild = n - 1; /* odd for easy medians */
+		const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+		const int lo = nchild / 4;
+		const int hi = (3 * nchild + 3) / 4 - 1;
+		struct igt_mean m;
+
+		memset(result, 0, (nchild + 1) * sizeof(result[0]));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+			fair_child(i915, ctx, e, child_ns,
+				   timeout, timeline, flags,
+				   &result[nchild],
+				   &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		while (nfences--) {
+			struct timespec tv = { .tv_nsec = fence_ns };
+			nanosleep(&tv, NULL);
+			sw_sync_timeline_inc(timeline, 1);
+		}
+		result[nchild] = 1;
+		for (int child = 0; child < nchild; child++) {
+			while (!READ_ONCE(result[child])) {
+				struct timespec tv = { .tv_nsec = fence_ns };
+				nanosleep(&tv, NULL);
+				sw_sync_timeline_inc(timeline, 1);
+			}
+		}
+		igt_waitchildren();
+		close(timeline);
+
+		igt_mean_init(&m);
+		for (int child = 0; child < nchild; child++)
+			igt_mean_add(&m, result[child]);
+
+		qsort(result, nchild, sizeof(*result), cmp_ul);
+		igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+			 nchild,
+			 1e-6 * result[0],  1e-6 * result[nchild - 1],
+			 1e-6 * result[lo], 1e-6 * result[hi],
+			 1e-6 * result[nchild / 2],
+			 1e-6 * igt_mean_get(&m),
+			 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+#if 0
+		/* Mean within 10% of target */
+		igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns &&
+			   10 * igt_mean_get(&m) <  9 * frame_ns);
+
+		/* Variance [inter-quartile range] is less than 33% of median */
+		igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]);
+#endif
+	}
+
+	munmap(result, 4096);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2589,6 +3016,21 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fair-none", fd, e)
+			fairness(fd, e, 2, 0);
+		test_each_engine_store("fair-pace", fd, e)
+			fairness(fd, e, 2, F_PACE);
+		test_each_engine_store("fair-sync", fd, e)
+			fairness(fd, e, 2, F_SYNC);
+		test_each_engine_store("fair-solo", fd, e)
+			fairness(fd, e, 2, F_SYNC | F_SOLO);
+		test_each_engine_store("fair-flow", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW);
+		test_each_engine_store("fair-spare", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW | F_SPARE);
+		test_each_engine_store("fair-half", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW | F_HALF);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
  2020-06-02  8:22 Chris Wilson
@ 2020-06-02  8:32 ` Chris Wilson
  2020-06-02  8:50 ` Chris Wilson
  1 sibling, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-02  8:32 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 440 +++++++++++++++++++++++++++++++++
 1 file changed, 440 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..911379cad 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,431 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+	return div64_u64_round_up(ticks * NSEC_PER_SEC,
+				  read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr,
+		  uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
+
+	if (offset_in_page(cs) & 4)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void tslog(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+	enum { ONE, MASK, ADDR };
+	uint32_t *timestamp_lo, *addr_lo;
+	uint32_t *map, *cs;
+
+	igt_require(base);
+
+	map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	cs = map + 512;
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_TIMESTAMP;
+	timestamp_lo = cs;
+	*cs++ = addr;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR);
+	addr_lo = cs;
+	*cs++ = addr;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR) + 4;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ONE);
+	*cs++ = 4;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ONE) + 4;
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK);
+	*cs++ = 0xfffff7ff;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK) + 4;
+	*cs++ = 0xffffffff;
+
+	*cs++ = MI_MATH(8);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+	*cs++ = MI_MATH_AND;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(timestamp_lo);
+	*cs++ = addr >> 32;
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(addr_lo);
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	tslog(i915, e, obj.handle, obj.offset);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeout,
+		       int timeline,
+		       unsigned int flags,
+		       unsigned long *ctl,
+		       unsigned long *out)
+#define F_SYNC  (1 << 0)
+#define F_PACE  (1 << 1)
+#define F_FLOW  (1 << 2)
+#define F_HALF  (1 << 3)
+#define F_SOLO  (1 << 4)
+#define F_SPARE (1 << 8)
+{
+	const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+	struct drm_i915_gem_exec_object2 prev =
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 next =
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e);
+	int p_fence = -1, n_fence = -1;
+	unsigned long count = 0;
+	uint32_t *map;
+	int n;
+
+	while (!READ_ONCE(*ctl)) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&next),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+			.rsvd2 = -1,
+			.flags = e->flags,
+		};
+
+		if (flags & F_FLOW) {
+			execbuf.rsvd2 =
+				sw_sync_timeline_create_fence(timeline, count);
+			execbuf.flags |= I915_EXEC_FENCE_IN;
+		}
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+		for (n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+
+		execbuf.buffers_ptr = to_user_pointer(&ts);
+		execbuf.batch_start_offset = 2048;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACE && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+		close(execbuf.rsvd2);
+
+		if (flags & F_SYNC) {
+			struct pollfd pfd = {
+				.fd = n_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+
+		igt_swap(prev, next);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	close(p_fence);
+
+	gem_close(i915, next.handle);
+	gem_close(i915, prev.handle);
+
+	gem_sync(i915, ts.handle);
+	map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE);
+	for (n = 1; n < min(count, 512); n++) {
+		igt_assert(map[n]);
+		map[n - 1] = map[n] - map[n - 1];
+	}
+	qsort(map, --n, sizeof(*map), cmp_u32);
+	*out = ticks_to_ns(i915, map[n / 2]);
+	munmap(map, 4096);
+
+	gem_close(i915, ts.handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+	unsigned long *result;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 16; n <<= 1) {
+		int timeline = sw_sync_timeline_create();
+		int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+		const int nchild = n - 1; /* odd for easy medians */
+		const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+		const int lo = nchild / 4;
+		const int hi = (3 * nchild + 3) / 4 - 1;
+		struct igt_mean m;
+
+		memset(result, 0, (nchild + 1) * sizeof(result[0]));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+			fair_child(i915, ctx, e, child_ns,
+				   timeout, timeline, flags,
+				   &result[nchild],
+				   &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		while (nfences--) {
+			struct timespec tv = { .tv_nsec = fence_ns };
+			nanosleep(&tv, NULL);
+			sw_sync_timeline_inc(timeline, 1);
+		}
+		result[nchild] = 1;
+		for (int child = 0; child < nchild; child++) {
+			while (!READ_ONCE(result[child])) {
+				struct timespec tv = { .tv_nsec = fence_ns };
+				nanosleep(&tv, NULL);
+				sw_sync_timeline_inc(timeline, 1);
+			}
+		}
+		igt_waitchildren();
+		close(timeline);
+
+		igt_mean_init(&m);
+		for (int child = 0; child < nchild; child++)
+			igt_mean_add(&m, result[child]);
+
+		qsort(result, nchild, sizeof(*result), cmp_ul);
+		igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+			 nchild,
+			 1e-6 * result[0],  1e-6 * result[nchild - 1],
+			 1e-6 * result[lo], 1e-6 * result[hi],
+			 1e-6 * result[nchild / 2],
+			 1e-6 * igt_mean_get(&m),
+			 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+#if 0
+		/* Mean within 10% of target */
+		igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns &&
+			   10 * igt_mean_get(&m) <  9 * frame_ns);
+
+		/* Variance [inter-quartile range] is less than 33% of median */
+		igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]);
+#endif
+	}
+
+	munmap(result, 4096);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2589,6 +3014,21 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fair-none", fd, e)
+			fairness(fd, e, 2, 0);
+		test_each_engine_store("fair-pace", fd, e)
+			fairness(fd, e, 2, F_PACE);
+		test_each_engine_store("fair-sync", fd, e)
+			fairness(fd, e, 2, F_SYNC);
+		test_each_engine_store("fair-solo", fd, e)
+			fairness(fd, e, 2, F_SYNC | F_SOLO);
+		test_each_engine_store("fair-flow", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW);
+		test_each_engine_store("fair-spare", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW | F_SPARE);
+		test_each_engine_store("fair-half", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW | F_HALF);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-02  8:22 Chris Wilson
  2020-06-02  8:32 ` Chris Wilson
  2020-06-02  8:50 ` Chris Wilson
  0 siblings, 2 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-02  8:22 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 436 +++++++++++++++++++++++++++++++++
 1 file changed, 436 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..3045eeb62 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,429 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+	return div64_u64_round_up(ticks * NSEC_PER_SEC,
+				  read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr,
+		  uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
+
+	if (offset_in_page(cs) & 4)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void tslog(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+	enum { ONE, MASK, ADDR };
+	uint32_t *timestamp_lo, *addr_lo;
+	uint32_t *map, *cs;
+
+	igt_require(base);
+
+	map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	cs = map + 512;
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_TIMESTAMP;
+	timestamp_lo = cs;
+	*cs++ = addr;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR);
+	addr_lo = cs;
+	*cs++ = addr;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR) + 4;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ONE);
+	*cs++ = 4;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ONE) + 4;
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK);
+	*cs++ = 0xfffff7ff;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK) + 4;
+	*cs++ = 0xffffffff;
+
+	*cs++ = MI_MATH(8);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+	*cs++ = MI_MATH_AND;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(timestamp_lo);
+	*cs++ = addr >> 32;
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(addr_lo);
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	tslog(i915, e, obj.handle, obj.offset);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeout,
+		       int timeline,
+		       unsigned int flags,
+		       unsigned long *ctl,
+		       unsigned long *out)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+{
+	const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+	struct drm_i915_gem_exec_object2 prev =
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 next =
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e);
+	int p_fence = -1, n_fence = -1;
+	unsigned long count = 0;
+	uint32_t *map;
+	int n;
+
+	while (!READ_ONCE(*ctl)) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&next),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+			.rsvd2 = -1,
+			.flags = e->flags,
+		};
+
+		if (flags & (F_FLOW | F_HALF)) {
+			execbuf.rsvd2 =
+				sw_sync_timeline_create_fence(timeline, count);
+			execbuf.flags |= I915_EXEC_FENCE_IN;
+		}
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+		for (n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+
+		execbuf.buffers_ptr = to_user_pointer(&ts);
+		execbuf.batch_start_offset = 2048;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACE && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+		close(execbuf.rsvd2);
+
+		if (flags & F_SYNC) {
+			struct pollfd pfd = {
+				.fd = n_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+
+		igt_swap(prev, next);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	close(p_fence);
+
+	gem_close(i915, next.handle);
+	gem_close(i915, prev.handle);
+
+	gem_sync(i915, ts.handle);
+	map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE);
+	for (n = 1; n < min(count, 512); n++) {
+		igt_assert(map[n]);
+		map[n - 1] = map[n] - map[n - 1];
+	}
+	qsort(map, --n, sizeof(*map), cmp_u32);
+	*out = ticks_to_ns(i915, map[n / 2]);
+	munmap(map, 4096);
+
+	gem_close(i915, ts.handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+	unsigned long *result;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 16; n <<= 1) {
+		int timeline = sw_sync_timeline_create();
+		int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+		const int nchild = n - 1; /* odd for easy medians */
+		const int lo = nchild / 4;
+		const int hi = (3 * nchild + 3) / 4 - 1;
+		struct igt_mean m;
+
+		memset(result, 0, (nchild + 1) * sizeof(result[0]));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+			fair_child(i915, ctx, e, frame_ns / nchild,
+				   timeout, timeline, flags,
+				   &result[nchild],
+				   &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		while (nfences--) {
+			struct timespec tv = { .tv_nsec = fence_ns };
+			nanosleep(&tv, NULL);
+			sw_sync_timeline_inc(timeline, 1);
+		}
+		result[nchild] = 1;
+		for (int child = 0; child < nchild; child++) {
+			while (!READ_ONCE(result[child])) {
+				struct timespec tv = { .tv_nsec = fence_ns };
+				nanosleep(&tv, NULL);
+				sw_sync_timeline_inc(timeline, 1);
+			}
+		}
+		igt_waitchildren();
+		close(timeline);
+
+		igt_mean_init(&m);
+		for (int child = 0; child < nchild; child++)
+			igt_mean_add(&m, result[child]);
+
+		qsort(result, nchild, sizeof(*result), cmp_ul);
+		igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+			 nchild,
+			 1e-6 * result[0],  1e-6 * result[nchild - 1],
+			 1e-6 * result[lo], 1e-6 * result[hi],
+			 1e-6 * result[nchild / 2],
+			 1e-6 * igt_mean_get(&m),
+			 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+#if 0
+		/* Mean within 10% of target */
+		igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns &&
+			   10 * igt_mean_get(&m) <  9 * frame_ns);
+
+		/* Variance [inter-quartile range] is less than 33% of median */
+		igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]);
+#endif
+	}
+
+	munmap(result, 4096);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2589,6 +3012,19 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fair-none", fd, e)
+			fairness(fd, e, 2, 0);
+		test_each_engine_store("fair-pace", fd, e)
+			fairness(fd, e, 2, F_PACE);
+		test_each_engine_store("fair-sync", fd, e)
+			fairness(fd, e, 2, F_SYNC);
+		test_each_engine_store("fair-flow", fd, e)
+			fairness(fd, e, 2, F_PACE | F_FLOW);
+		test_each_engine_store("fair-half", fd, e)
+			fairness(fd, e, 2, F_PACE | F_HALF);
+		test_each_engine_store("fair-solo", fd, e)
+			fairness(fd, e, 2, F_SYNC | F_SOLO);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-02  0:26 Chris Wilson
  0 siblings, 0 replies; 22+ messages in thread
From: Chris Wilson @ 2020-06-02  0:26 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.

Though we have never claimed to have a completely fair scheduler, that
is what is expected.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
 tests/i915/gem_exec_schedule.c | 418 +++++++++++++++++++++++++++++++++
 1 file changed, 418 insertions(+)

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..d1121ecd2 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,417 @@ static void measure_semaphore_power(int i915)
 	rapl_close(&pkg);
 }
 
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+	return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+	return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+				  NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+	return div64_u64_round_up(ticks * NSEC_PER_SEC,
+				  read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr,
+		  uint64_t ns)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	enum { START_TS, NOW_TS };
+	uint32_t *map, *cs, *jmp;
+
+	igt_require(base);
+
+	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
+
+	if (offset_in_page(cs) & 4)
+		*cs++ = 0;
+	jmp = cs;
+
+	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	*cs++ = ~ns_to_ticks(i915, ns);
+	*cs++ = addr + 4000;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	*cs++ = addr + offset_in_page(jmp);
+	*cs++ = addr >> 32;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+	     const struct intel_execution_engine2 *e,
+	     uint64_t target_ns)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	delay(i915, e, obj.handle, obj.offset, target_ns);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static void tslog(int i915,
+		  const struct intel_execution_engine2 *e,
+		  uint32_t handle,
+		  uint64_t addr)
+{
+	const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+	const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+	enum { ONE, MASK, ADDR };
+	uint32_t *timestamp_lo, *addr_lo;
+	uint32_t *map, *cs;
+
+	igt_require(base);
+
+	map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+	cs = map + 512;
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_TIMESTAMP;
+	timestamp_lo = cs;
+	*cs++ = addr;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR);
+	addr_lo = cs;
+	*cs++ = addr;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ADDR) + 4;
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ONE);
+	*cs++ = 4;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(ONE) + 4;
+	*cs++ = 0;
+
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK);
+	*cs++ = 0xfffff7ff;
+	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = CS_GPR(MASK) + 4;
+	*cs++ = 0xffffffff;
+
+	*cs++ = MI_MATH(8);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_ADD;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+	*cs++ = MI_MATH_AND;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(timestamp_lo);
+	*cs++ = addr >> 32;
+	*cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+	*cs++ = CS_GPR(ADDR);
+	*cs++ = addr + offset_in_page(addr_lo);
+	*cs++ = addr >> 32;
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+		.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+	};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&obj),
+		.buffer_count = 1,
+		.rsvd1 = ctx,
+		.flags = e->flags,
+	};
+
+	gem_execbuf(i915, &execbuf);
+	gem_sync(i915, obj.handle);
+
+	tslog(i915, e, obj.handle, obj.offset);
+
+	obj.flags |= EXEC_OBJECT_PINNED;
+	return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+		       const struct intel_execution_engine2 *e,
+		       uint64_t frame_ns,
+		       int timeout,
+		       int timeline,
+		       unsigned int flags,
+		       unsigned long *ctl,
+		       unsigned long *out)
+#define F_PACING 0x1
+#define F_EXTERNAL 0x2
+{
+	const int batches_per_frame = 3;
+	struct drm_i915_gem_exec_object2 prev =
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 next =
+		delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+	struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e);
+	struct timespec tv = {};
+	unsigned long count = 0;
+	int p_fence = -1, n_fence = -1;
+	uint32_t *map;
+	int n;
+
+	igt_nsec_elapsed(&tv);
+	while (!READ_ONCE(*ctl)) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&next),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+			.rsvd2 = -1,
+			.flags = e->flags,
+		};
+
+		if (flags & F_EXTERNAL) {
+			execbuf.rsvd2 =
+				sw_sync_timeline_create_fence(timeline, count);
+			execbuf.flags |= I915_EXEC_FENCE_IN;
+		}
+
+		execbuf.flags |= I915_EXEC_FENCE_OUT;
+		gem_execbuf_wr(i915, &execbuf);
+		n_fence = execbuf.rsvd2 >> 32;
+		execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+		for (n = 1; n < batches_per_frame; n++)
+			gem_execbuf(i915, &execbuf);
+
+		execbuf.buffers_ptr = to_user_pointer(&ts);
+		execbuf.batch_start_offset = 2048;
+		gem_execbuf(i915, &execbuf);
+
+		if (flags & F_PACING && p_fence != -1) {
+			struct pollfd pfd = {
+				.fd = p_fence,
+				.events = POLLIN,
+			};
+			poll(&pfd, 1, -1);
+		}
+		close(p_fence);
+		close(execbuf.rsvd2);
+
+		igt_swap(prev, next);
+		igt_swap(p_fence, n_fence);
+		count++;
+	}
+	gem_sync(i915, prev.handle);
+	close(p_fence);
+
+	gem_close(i915, next.handle);
+	gem_close(i915, prev.handle);
+
+	map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE);
+	for (n = 1; n < min(count, 512); n++)
+		map[n - 1] = map[n] - map[n - 1];
+	qsort(map, --n, sizeof(*map), cmp_u32);
+	*out = ticks_to_ns(i915, map[n / 2]);
+	munmap(map, 4096);
+
+	gem_close(i915, ts.handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+	const unsigned long *a = A, *b = B;
+
+	if (*a < *b)
+		return -1;
+	else if (*a > *b)
+		return 1;
+	else
+		return 0;
+}
+
+static void fairness(int i915,
+		     const struct intel_execution_engine2 *e,
+		     int timeout, unsigned int flags)
+{
+	const int frame_ns = 16666 * 1000;
+	unsigned long *result;
+
+	igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+	igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+	result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	for (int n = 2; n <= 16; n <<= 1) {
+		int timeline = sw_sync_timeline_create();
+		int nframes = timeout * NSEC_PER_SEC / frame_ns + 1;
+		const int nchild = n - 1; /* odd for easy medians */
+		const int lo = nchild / 4;
+		const int hi = (3 * nchild + 3) / 4 - 1;
+		struct igt_mean m;
+
+		memset(result, 0, (nchild + 1) * sizeof(result[0]));
+		igt_fork(child, nchild) {
+			uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+			fair_child(i915, ctx, e, frame_ns / nchild,
+				   timeout, timeline, flags,
+				   &result[nchild],
+				   &result[child]);
+
+			gem_context_destroy(i915, ctx);
+		}
+
+		while (nframes--) {
+			struct timespec tv = { .tv_nsec = frame_ns };
+			nanosleep(&tv, NULL);
+			sw_sync_timeline_inc(timeline, 1);
+		}
+		result[nchild] = 1;
+		for (int child = 0; child < nchild; child++) {
+			while (!READ_ONCE(result[child])) {
+				struct timespec tv = { .tv_nsec = frame_ns };
+				nanosleep(&tv, NULL);
+				sw_sync_timeline_inc(timeline, 1);
+			}
+		}
+		igt_waitchildren();
+		close(timeline);
+
+		igt_mean_init(&m);
+		for (int child = 0; child < nchild; child++)
+			igt_mean_add(&m, result[child]);
+
+		qsort(result, nchild, sizeof(*result), cmp_ul);
+		igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+			 nchild,
+			 1e-6 * result[0],  1e-6 * result[nchild - 1],
+			 1e-6 * result[lo], 1e-6 * result[hi],
+			 1e-6 * result[nchild / 2],
+			 1e-6 * igt_mean_get(&m),
+			 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+#if 0
+		/* Mean within 10% of target */
+		igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns &&
+			   10 * igt_mean_get(&m) <  9 * frame_ns);
+
+		/* Variance [inter-quartile range] is less than 33% of median */
+		igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]);
+#endif
+	}
+
+	munmap(result, 4096);
+}
+
 #define test_each_engine(T, i915, e) \
 	igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
 		igt_dynamic_f("%s", e->name)
@@ -2589,6 +3000,13 @@ igt_main
 		test_each_engine_store("promotion", fd, e)
 			promotion(fd, e->flags);
 
+		test_each_engine_store("fair-none", fd, e)
+			fairness(fd, e, 2, 0);
+		test_each_engine_store("fair-pace", fd, e)
+			fairness(fd, e, 2, F_PACING);
+		test_each_engine_store("fair-sync", fd, e)
+			fairness(fd, e, 2, F_PACING | F_EXTERNAL);
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(gem_scheduler_has_preemption(fd));
-- 
2.27.0.rc2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2020-12-16 15:25 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-01 19:08 [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Chris Wilson
2020-06-01 19:08 ` [igt-dev] " Chris Wilson
2020-06-01 19:37 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
2020-06-01 19:53 ` [Intel-gfx] [PATCH i-g-t] " Chris Wilson
2020-06-01 19:53   ` [igt-dev] " Chris Wilson
2020-06-01 20:24 ` [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness (rev2) Patchwork
2020-06-01 21:17 ` [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Chris Wilson
2020-06-01 21:17   ` [igt-dev] " Chris Wilson
2020-06-01 22:02 ` [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness (rev3) Patchwork
2020-06-02  2:24 ` [igt-dev] ✓ Fi.CI.IGT: success for i915/gem_exec_schedule: Try to spot unfairness Patchwork
2020-06-02  3:59 ` [igt-dev] ✗ Fi.CI.IGT: failure for i915/gem_exec_schedule: Try to spot unfairness (rev2) Patchwork
2020-06-02  6:11 ` [igt-dev] ✗ Fi.CI.IGT: failure for i915/gem_exec_schedule: Try to spot unfairness (rev3) Patchwork
2020-06-02  0:26 [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Chris Wilson
2020-06-02  8:22 Chris Wilson
2020-06-02  8:32 ` Chris Wilson
2020-06-02  8:50 ` Chris Wilson
2020-06-09 12:45 Chris Wilson
2020-06-22 19:08 Chris Wilson
2020-08-03 13:57 Chris Wilson
2020-11-24 23:39 Chris Wilson
2020-12-10  2:09 Chris Wilson
2020-12-16 15:24 Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.