* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-12-16 15:24 ` Chris Wilson
0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-12-16 15:24 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
v2: igt_assert_f and more commentary; exclude vip from client stats,
include range of frame intervals from each individual client
v3: Write down what the test actually does!
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 797 +++++++++++++++++++++++++++++++++
1 file changed, 797 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index dd15b2ac7..8be5539aa 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
#include <sys/poll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/syscall.h>
#include <sched.h>
#include <signal.h>
@@ -2532,12 +2533,250 @@ static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
return (x + y - 1) / y;
}
+static uint64_t ns_to_ctx_ticks(int i915, uint64_t ns)
+{
+ int f = read_timestamp_frequency(i915);
+ if (intel_gen(intel_get_drm_devid(i915)) == 11)
+ f = 12500000; /* icl!!! are you feeling alright? CTX vs CS */
+ return div64_u64_round_up(ns * f, NSEC_PER_SEC);
+}
+
static uint64_t ticks_to_ns(int i915, uint64_t ticks)
{
return div64_u64_round_up(ticks * NSEC_PER_SEC,
read_timestamp_frequency(i915));
}
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ /* Loop until CTX_TIMESTAMP - initial > @ns */
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(START_TS);
+
+ while (offset_in_page(cs) & 63)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(NOW_TS);
+
+ /* delta = now - start; inverted to match COND_BBE */
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ /* Save delta for reading by COND_BBE */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Delay between SRM and COND_BBE to post the writes */
+ for (int n = 0; n < 8; n++) {
+ *cs++ = MI_STORE_DWORD_IMM;
+ if (use_64b) {
+ *cs++ = addr + 4064;
+ *cs++ = addr >> 32;
+ } else {
+ *cs++ = 0;
+ *cs++ = addr + 4064;
+ }
+ *cs++ = 0;
+ }
+
+ /* Break if delta [time elapsed] > ns */
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ctx_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Otherwise back to recalculating delta */
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { INC, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ /* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ /* Load the address + inc & mask variables */
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ /* Increment the [ring] address for saving CS_TIMESTAMP */
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ /* Rewrite the batch buffer for the next execution */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
static int cmp_u32(const void *A, const void *B)
{
const uint32_t *a = A, *b = B;
@@ -2550,6 +2789,560 @@ static int cmp_u32(const void *A, const void *B)
return 0;
}
+static bool has_ctx_timestamp(int i915, const struct intel_execution_engine2 *e)
+{
+ const int gen = intel_gen(intel_get_drm_devid(i915));
+
+ if (gen == 8 && e->class == I915_ENGINE_CLASS_VIDEO)
+ return false; /* looks fubar */
+
+ return true;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+ const struct intel_execution_engine2 *e;
+ unsigned int count = 0;
+
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ count++;
+ }
+ if (!count)
+ return *not;
+
+ count = rand() % count;
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ if (!count--)
+ break;
+ }
+
+ return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeline,
+ uint32_t common,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *median,
+ unsigned long *iqr)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+#define F_SPARE (1 << 5)
+#define F_NEXT (1 << 6)
+#define F_VIP (1 << 7)
+#define F_RRUL (1 << 8)
+#define F_SHARE (1 << 9)
+#define F_PING (1 << 10)
+#define F_THROTTLE (1 << 11)
+#define F_ISOLATE (1 << 12)
+{
+ const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+ struct drm_i915_gem_exec_object2 obj[4] = {
+ {},
+ {
+ .handle = common ?: gem_create(i915, 4096),
+ },
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ };
+ struct intel_execution_engine2 ping = *e;
+ int p_fence = -1, n_fence = -1;
+ unsigned long count = 0;
+ int n;
+
+ srandom(getpid());
+ if (flags & F_PING)
+ ping = pick_random_engine(i915, e);
+ obj[0] = tslog_create(i915, ctx, &ping);
+
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(obj),
+ .buffer_count = 3,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_FLOW) {
+ unsigned int seq;
+
+ seq = count;
+ if (flags & F_NEXT)
+ seq++;
+
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, seq);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+ close(execbuf.rsvd2);
+
+ execbuf.buffer_count = 1;
+ execbuf.batch_start_offset = 2048;
+ execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+ execbuf.rsvd2 = n_fence;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACE && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+
+ if (flags & F_SYNC) {
+ struct pollfd pfd = {
+ .fd = n_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+
+ if (flags & F_THROTTLE)
+ igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+ igt_swap(obj[2], obj[3]);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ close(p_fence);
+
+ gem_close(i915, obj[3].handle);
+ gem_close(i915, obj[2].handle);
+ if (obj[1].handle != common)
+ gem_close(i915, obj[1].handle);
+
+ gem_sync(i915, obj[0].handle);
+ if (median) {
+ uint32_t *map;
+
+ /*
+ * We recorded the CS_TIMESTAMP of each frame, and if
+ * the GPU is being shared completely fairly, we expect
+ * each frame to be at the same interval from the last.
+ *
+ * Compute the interval between frames and report back
+ * both the median interval and the range for this client.
+ */
+
+ map = gem_mmap__device_coherent(i915, obj[0].handle,
+ 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++) {
+ igt_assert(map[n]);
+ map[n - 1] = map[n] - map[n - 1];
+ }
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *iqr = ticks_to_ns(i915, map[(3 * n + 3) / 4] - map[n / 4]);
+ *median = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+ }
+ gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+ uint64_t cpu_time = 0;
+
+ cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+ cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+ return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+ struct timespec tv = { .tv_nsec = delay_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+ unsigned long *result, *iqr;
+ uint32_t common = 0;
+
+ igt_require(has_ctx_timestamp(i915, e));
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ if (flags & F_SHARE)
+ common = gem_create(i915, 4095);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ igt_assert(result != MAP_FAILED);
+ iqr = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ igt_assert(iqr != MAP_FAILED);
+
+ /*
+ * The combined workload always runs at a 60fps target (unless F_HALF!).
+ * This gives a frame of interval of 16ms that is evenly split across
+ * all the clients, so simulating a system with a bunch of clients that
+ * are perfectly balanced and can sustain 60fps. Our job is to ensure
+ * that each client does run at a smooth 60fps.
+ *
+ * Each client runs a fixed length delay loop (as a single request,
+ * or split into 3) and then records the CS_TIMESTAMP after completing
+ * its delay. Given a fair allotment of GPU time to each client,
+ * that timestamp will [ideally] be at a precise 16ms intervals.
+ * In practice, time is wasted on context switches, so as the number
+ * of clients increases, the proprotion of time spent on context
+ * switches grows. As we get to 64 render clients, we will be spending
+ * as much time in context switches as executing the client workloads.
+ *
+ * Each client frame may be paced by some throttling technique found
+ * in the wild. i.e. each client may wait until a simulated vblank
+ * to indicate the start of a new frame, or it may wait until the
+ * completion of a previous frame. This causes submission from each
+ * client and across the system to be chunky and uneven.
+ *
+ * We look at the variation of frame intervals within each client, and
+ * the variation of the medians across the clients to see if the
+ * distribution (budget) of GPU time was fair enough.
+ *
+ * Alternative (and important) metrics will be more latency centric;
+ * looking at how well we can sustain meeting deadline given competition
+ * by clients for the GPU.
+ */
+
+ for (int n = 2; n <= 256; n <<= 1) { /* 32 == 500us per client */
+ int timeline = sw_sync_timeline_create();
+ int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+ int nchild = n - 1; /* odd for easy medians */
+ const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct rusage old_usage, usage;
+ uint64_t cpu_time, d_time;
+ struct timespec tv;
+ struct igt_mean m;
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+
+ if (flags & F_PING) { /* fill the others with light bg load */
+ struct intel_execution_engine2 *ping;
+
+ __for_each_physical_engine(i915, ping) {
+ if (ping->flags == e->flags)
+ continue;
+
+ igt_fork(child, 1) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, ping,
+ child_ns / 8,
+ -1, common,
+ F_SOLO | F_PACE | F_SHARE,
+ &result[nchild],
+ NULL, NULL);
+
+ gem_context_destroy(i915, ctx);
+ }
+ }
+ }
+
+ getrusage(RUSAGE_CHILDREN, &old_usage);
+ igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+ igt_fork(child, nchild) {
+ uint32_t ctx;
+
+ if (flags & F_ISOLATE) {
+ int clone, dmabuf = -1;
+
+ if (common)
+ dmabuf = prime_handle_to_fd(i915, common);
+
+ clone = gem_reopen_driver(i915);
+ gem_context_copy_engines(i915, 0, clone, 0);
+ i915 = clone;
+
+ if (dmabuf != -1)
+ common = prime_fd_to_handle(i915, dmabuf);
+ }
+
+ ctx = gem_context_clone_with_engines(i915, 0);
+
+ if (flags & F_VIP && child == 0) {
+ gem_context_set_priority(i915, ctx, MAX_PRIO);
+ flags |= F_FLOW;
+ }
+ if (flags & F_RRUL && child == 0)
+ flags |= F_SOLO | F_FLOW | F_SYNC;
+
+ fair_child(i915, ctx, e, child_ns,
+ timeline, common, flags,
+ &result[nchild],
+ &result[child], &iqr[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nfences--)
+ timeline_advance(timeline, fence_ns);
+
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child]))
+ timeline_advance(timeline, fence_ns);
+ }
+
+ igt_waitchildren();
+ close(timeline);
+
+ /*
+ * Are we running out of CPU time, and fail to submit frames?
+ *
+ * We try to rule out any undue impact on the GPU scheduling
+ * from the CPU scheduler by looking for core saturation. If
+ * we may be in a situation where the clients + kernel are
+ * taking a whole core (think lockdep), then it is increasingly
+ * likely that our measurements include delays from the CPU
+ * scheduler. Err on the side of caution.
+ */
+ d_time = igt_nsec_elapsed(&tv);
+ getrusage(RUSAGE_CHILDREN, &usage);
+ cpu_time = d_cpu_time(&usage, &old_usage);
+ igt_debug("CPU usage: %.0f%%\n", 100. * cpu_time / d_time);
+ if (4 * cpu_time > 3 * d_time) {
+ if (nchild > 7) /* good enough to judge pass/fail */
+ break;
+
+ igt_skip_on_f(4 * cpu_time > 3 * d_time,
+ "%.0f%% CPU usage, presuming capacity exceeded\n",
+ 100. * cpu_time / d_time);
+ }
+
+ /* With no contention, we should match our target frametime */
+ if (nchild == 1) {
+ igt_assert(4 * result[0] > 3 * fence_ns &&
+ 3 * result[0] < 4 * fence_ns);
+ continue;
+ }
+
+ /*
+ * The VIP should always be able to hit the target frame rate;
+ * regardless of budget contention from lessor clients.
+ */
+ if (flags & (F_VIP | F_RRUL)) {
+ igt_info("VIP interval %.2fms, range %.2fms\n",
+ 1e-6 * result[0], 1e-6 * iqr[0]);
+ igt_assert_f(4 * result[0] > 3 * fence_ns &&
+ 3 * result[0] < 4 * fence_ns,
+ "VIP expects to run exactly when it wants, expects an interval of %.2fms, was %.2fms\n",
+ 1e-6 * fence_ns, 1e-6 * result[0]);
+ igt_assert_f(2 * iqr[0] < result[0],
+ "VIP frame IQR %.2fms exceeded median threshold %.2fms\n",
+ 1e-6 * iqr[0],
+ 1e-6 * result[0] / 2);
+ if (!--nchild)
+ continue;
+
+ /* Exclude the VIP result from the plebian statistics */
+ memmove(result, result + 1, nchild * sizeof(*result));
+ memmove(iqr, iqr + 1, nchild * sizeof(*iqr));
+ }
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ qsort(iqr, nchild, sizeof(*iqr), cmp_ul);
+
+ /*
+ * The target interval for median/mean is 16ms (fence_ns).
+ * However, this work is evenly split across the clients so
+ * the range (and median) of client medians may be much less
+ * than 16ms [16/3N]. We present median of medians to try
+ * and avoid any instability while running in CI; at the cost
+ * of insensitivity!
+ */
+ igt_info("%3d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f [%.1f, %.1f], mean: %.1f ± %.2f ms, cpu: %.0f%%\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * iqr[lo], 1e-6 * iqr[hi],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)),
+ 100. * cpu_time / d_time);
+
+ igt_assert_f(iqr[nchild / 2] < 2 * result[nchild / 2],
+ "Child frame IQR %.2fms exceeded median threshold %.2fms\n",
+ 1e-6 * iqr[nchild / 2],
+ 1e-6 * result[nchild / 2] * 2);
+
+ igt_assert_f(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+ 3 * igt_mean_get(&m) < 4 * result[nchild / 2],
+ "Mean of client interval %.2fms differs from median %.2fms, distribution is skewed\n",
+
+ 1e-6 * igt_mean_get(&m), 1e-6 * result[nchild / 2]);
+
+ igt_assert_f(result[nchild / 2] > frame_ns / 2,
+ "Median client interval %.2fms did not match target interval %.2fms\n",
+ 1e-6 * result[nchild / 2], 1e-6 * frame_ns);
+
+
+ igt_assert_f(result[hi] - result[lo] < result[nchild / 2],
+ "Interquartile range of client intervals %.2fms is as large as the median threshold %.2fms, clients are not evenly distributed!\n",
+ 1e-6 * (result[hi] - result[lo]),
+ 1e-6 * result[nchild / 2]);
+
+ /* May be slowed due to sheer volume of context switches */
+ if (result[0] > 2 * fence_ns)
+ break;
+ }
+
+ munmap(iqr, 4096);
+ munmap(result, 4096);
+ if (common)
+ gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+ static const struct {
+ const char *name;
+ unsigned int flags;
+ } fair[] = {
+ /*
+ * none - maximal greed in each client
+ *
+ * Push as many frames from each client as fast as possible
+ */
+ { "none", 0 },
+ { "none-vip", F_VIP }, /* one vip client must meet deadlines */
+ { "none-solo", F_SOLO }, /* 1 batch per frame per client */
+ { "none-share", F_SHARE }, /* read from a common buffer */
+ { "none-rrul", F_RRUL }, /* "realtime-response under load" */
+ { "none-ping", F_PING }, /* measure inter-engine fairness */
+
+ /*
+ * throttle - original per client throttling
+ *
+ * Used for front buffering rendering where there is no
+ * extenal frame marker. Each client tries to only keep
+ * 20ms of work submitted, though that measurement is
+ * flawed...
+ *
+ * This is used by Xorg to try and maintain some resembalance
+ * of input/output consistency when being feed a continuous
+ * stream of X11 draw requests straight into scanout, where
+ * the clients may submit the work faster than can be drawn.
+ *
+ * Throttling tracks requests per-file (and assumes that
+ * all requests are in submission order across the whole file),
+ * so we split each child to its own fd.
+ */
+ { "throttle", F_THROTTLE | F_ISOLATE },
+ { "throttle-vip", F_THROTTLE | F_ISOLATE | F_VIP },
+ { "throttle-solo", F_THROTTLE | F_ISOLATE | F_SOLO },
+ { "throttle-share", F_THROTTLE | F_ISOLATE | F_SHARE },
+ { "throttle-rrul", F_THROTTLE | F_ISOLATE | F_RRUL },
+
+ /*
+ * pace - mesa "submit double buffering"
+ *
+ * Submit a frame, wait for previous frame to start. This
+ * prevents each client from getting too far ahead of its
+ * rendering, maintaining a consistent input/output latency.
+ */
+ { "pace", F_PACE },
+ { "pace-solo", F_PACE | F_SOLO },
+ { "pace-share", F_PACE | F_SOLO | F_SHARE },
+ { "pace-ping", F_PACE | F_SOLO | F_SHARE | F_PING},
+
+ /* sync - only submit a frame at a time */
+ { "sync", F_SYNC },
+ { "sync-vip", F_SYNC | F_VIP },
+ { "sync-solo", F_SYNC | F_SOLO },
+
+ /* flow - synchronise execution against the clock (vblank) */
+ { "flow", F_PACE | F_FLOW },
+ { "flow-solo", F_PACE | F_FLOW | F_SOLO },
+ { "flow-share", F_PACE | F_FLOW | F_SHARE },
+ { "flow-ping", F_PACE | F_FLOW | F_SHARE | F_PING },
+
+ /* next - submit ahead of the clock (vblank double buffering) */
+ { "next", F_PACE | F_FLOW | F_NEXT },
+ { "next-solo", F_PACE | F_FLOW | F_NEXT | F_SOLO },
+ { "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+ { "next-ping", F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+ /* spare - underutilise by a single client timeslice */
+ { "spare", F_PACE | F_FLOW | F_SPARE },
+ { "spare-solo", F_PACE | F_FLOW | F_SPARE | F_SOLO },
+
+ /* half - run at half pace (submit 16ms of work every 32ms) */
+ { "half", F_PACE | F_FLOW | F_HALF },
+ { "half-solo", F_PACE | F_FLOW | F_HALF | F_SOLO },
+
+ {}
+ };
+
+ igt_fixture {
+ igt_info("CS timestamp frequency: %d\n",
+ read_timestamp_frequency(i915));
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ }
+
+ for (typeof(*fair) *f = fair; f->name; f++) {
+ igt_subtest_with_dynamic_f("fair-%s", f->name) {
+ const struct intel_execution_engine2 *e;
+
+ __for_each_physical_engine(i915, e) {
+ if (!gem_class_can_store_dword(i915, e->class))
+ continue;
+
+ igt_dynamic_f("%s", e->name)
+ fairness(i915, e, timeout, f->flags);
+ }
+ }
+ }
+}
+
static uint32_t read_ctx_timestamp(int i915,
uint32_t ctx,
const struct intel_execution_engine2 *e)
@@ -2789,6 +3582,10 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ igt_subtest_group {
+ test_fairness(fd, 2);
+ }
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.29.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [igt-dev] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-12-16 15:24 ` Chris Wilson
0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-12-16 15:24 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Tvrtko Ursulin, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
v2: igt_assert_f and more commentary; exclude vip from client stats,
include range of frame intervals from each individual client
v3: Write down what the test actually does!
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 797 +++++++++++++++++++++++++++++++++
1 file changed, 797 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index dd15b2ac7..8be5539aa 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
#include <sys/poll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/syscall.h>
#include <sched.h>
#include <signal.h>
@@ -2532,12 +2533,250 @@ static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
return (x + y - 1) / y;
}
+static uint64_t ns_to_ctx_ticks(int i915, uint64_t ns)
+{
+ int f = read_timestamp_frequency(i915);
+ if (intel_gen(intel_get_drm_devid(i915)) == 11)
+ f = 12500000; /* icl!!! are you feeling alright? CTX vs CS */
+ return div64_u64_round_up(ns * f, NSEC_PER_SEC);
+}
+
static uint64_t ticks_to_ns(int i915, uint64_t ticks)
{
return div64_u64_round_up(ticks * NSEC_PER_SEC,
read_timestamp_frequency(i915));
}
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ /* Loop until CTX_TIMESTAMP - initial > @ns */
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(START_TS);
+
+ while (offset_in_page(cs) & 63)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(NOW_TS);
+
+ /* delta = now - start; inverted to match COND_BBE */
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ /* Save delta for reading by COND_BBE */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Delay between SRM and COND_BBE to post the writes */
+ for (int n = 0; n < 8; n++) {
+ *cs++ = MI_STORE_DWORD_IMM;
+ if (use_64b) {
+ *cs++ = addr + 4064;
+ *cs++ = addr >> 32;
+ } else {
+ *cs++ = 0;
+ *cs++ = addr + 4064;
+ }
+ *cs++ = 0;
+ }
+
+ /* Break if delta [time elapsed] > ns */
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ctx_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Otherwise back to recalculating delta */
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { INC, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ /* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ /* Load the address + inc & mask variables */
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ /* Increment the [ring] address for saving CS_TIMESTAMP */
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ /* Rewrite the batch buffer for the next execution */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
static int cmp_u32(const void *A, const void *B)
{
const uint32_t *a = A, *b = B;
@@ -2550,6 +2789,560 @@ static int cmp_u32(const void *A, const void *B)
return 0;
}
+static bool has_ctx_timestamp(int i915, const struct intel_execution_engine2 *e)
+{
+ const int gen = intel_gen(intel_get_drm_devid(i915));
+
+ if (gen == 8 && e->class == I915_ENGINE_CLASS_VIDEO)
+ return false; /* looks fubar */
+
+ return true;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+ const struct intel_execution_engine2 *e;
+ unsigned int count = 0;
+
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ count++;
+ }
+ if (!count)
+ return *not;
+
+ count = rand() % count;
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ if (!count--)
+ break;
+ }
+
+ return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeline,
+ uint32_t common,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *median,
+ unsigned long *iqr)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+#define F_SPARE (1 << 5)
+#define F_NEXT (1 << 6)
+#define F_VIP (1 << 7)
+#define F_RRUL (1 << 8)
+#define F_SHARE (1 << 9)
+#define F_PING (1 << 10)
+#define F_THROTTLE (1 << 11)
+#define F_ISOLATE (1 << 12)
+{
+ const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+ struct drm_i915_gem_exec_object2 obj[4] = {
+ {},
+ {
+ .handle = common ?: gem_create(i915, 4096),
+ },
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ };
+ struct intel_execution_engine2 ping = *e;
+ int p_fence = -1, n_fence = -1;
+ unsigned long count = 0;
+ int n;
+
+ srandom(getpid());
+ if (flags & F_PING)
+ ping = pick_random_engine(i915, e);
+ obj[0] = tslog_create(i915, ctx, &ping);
+
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(obj),
+ .buffer_count = 3,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_FLOW) {
+ unsigned int seq;
+
+ seq = count;
+ if (flags & F_NEXT)
+ seq++;
+
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, seq);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+ close(execbuf.rsvd2);
+
+ execbuf.buffer_count = 1;
+ execbuf.batch_start_offset = 2048;
+ execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+ execbuf.rsvd2 = n_fence;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACE && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+
+ if (flags & F_SYNC) {
+ struct pollfd pfd = {
+ .fd = n_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+
+ if (flags & F_THROTTLE)
+ igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+ igt_swap(obj[2], obj[3]);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ close(p_fence);
+
+ gem_close(i915, obj[3].handle);
+ gem_close(i915, obj[2].handle);
+ if (obj[1].handle != common)
+ gem_close(i915, obj[1].handle);
+
+ gem_sync(i915, obj[0].handle);
+ if (median) {
+ uint32_t *map;
+
+ /*
+ * We recorded the CS_TIMESTAMP of each frame, and if
+ * the GPU is being shared completely fairly, we expect
+ * each frame to be at the same interval from the last.
+ *
+ * Compute the interval between frames and report back
+ * both the median interval and the range for this client.
+ */
+
+ map = gem_mmap__device_coherent(i915, obj[0].handle,
+ 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++) {
+ igt_assert(map[n]);
+ map[n - 1] = map[n] - map[n - 1];
+ }
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *iqr = ticks_to_ns(i915, map[(3 * n + 3) / 4] - map[n / 4]);
+ *median = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+ }
+ gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+ uint64_t cpu_time = 0;
+
+ cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+ cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+ return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+ struct timespec tv = { .tv_nsec = delay_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+ unsigned long *result, *iqr;
+ uint32_t common = 0;
+
+ igt_require(has_ctx_timestamp(i915, e));
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ if (flags & F_SHARE)
+ common = gem_create(i915, 4095);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ igt_assert(result != MAP_FAILED);
+ iqr = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ igt_assert(iqr != MAP_FAILED);
+
+ /*
+ * The combined workload always runs at a 60fps target (unless F_HALF!).
+ * This gives a frame of interval of 16ms that is evenly split across
+ * all the clients, so simulating a system with a bunch of clients that
+ * are perfectly balanced and can sustain 60fps. Our job is to ensure
+ * that each client does run at a smooth 60fps.
+ *
+ * Each client runs a fixed length delay loop (as a single request,
+ * or split into 3) and then records the CS_TIMESTAMP after completing
+ * its delay. Given a fair allotment of GPU time to each client,
+ * that timestamp will [ideally] be at a precise 16ms intervals.
+ * In practice, time is wasted on context switches, so as the number
+ * of clients increases, the proprotion of time spent on context
+ * switches grows. As we get to 64 render clients, we will be spending
+ * as much time in context switches as executing the client workloads.
+ *
+ * Each client frame may be paced by some throttling technique found
+ * in the wild. i.e. each client may wait until a simulated vblank
+ * to indicate the start of a new frame, or it may wait until the
+ * completion of a previous frame. This causes submission from each
+ * client and across the system to be chunky and uneven.
+ *
+ * We look at the variation of frame intervals within each client, and
+ * the variation of the medians across the clients to see if the
+ * distribution (budget) of GPU time was fair enough.
+ *
+ * Alternative (and important) metrics will be more latency centric;
+ * looking at how well we can sustain meeting deadline given competition
+ * by clients for the GPU.
+ */
+
+ for (int n = 2; n <= 256; n <<= 1) { /* 32 == 500us per client */
+ int timeline = sw_sync_timeline_create();
+ int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+ int nchild = n - 1; /* odd for easy medians */
+ const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct rusage old_usage, usage;
+ uint64_t cpu_time, d_time;
+ struct timespec tv;
+ struct igt_mean m;
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+
+ if (flags & F_PING) { /* fill the others with light bg load */
+ struct intel_execution_engine2 *ping;
+
+ __for_each_physical_engine(i915, ping) {
+ if (ping->flags == e->flags)
+ continue;
+
+ igt_fork(child, 1) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, ping,
+ child_ns / 8,
+ -1, common,
+ F_SOLO | F_PACE | F_SHARE,
+ &result[nchild],
+ NULL, NULL);
+
+ gem_context_destroy(i915, ctx);
+ }
+ }
+ }
+
+ getrusage(RUSAGE_CHILDREN, &old_usage);
+ igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+ igt_fork(child, nchild) {
+ uint32_t ctx;
+
+ if (flags & F_ISOLATE) {
+ int clone, dmabuf = -1;
+
+ if (common)
+ dmabuf = prime_handle_to_fd(i915, common);
+
+ clone = gem_reopen_driver(i915);
+ gem_context_copy_engines(i915, 0, clone, 0);
+ i915 = clone;
+
+ if (dmabuf != -1)
+ common = prime_fd_to_handle(i915, dmabuf);
+ }
+
+ ctx = gem_context_clone_with_engines(i915, 0);
+
+ if (flags & F_VIP && child == 0) {
+ gem_context_set_priority(i915, ctx, MAX_PRIO);
+ flags |= F_FLOW;
+ }
+ if (flags & F_RRUL && child == 0)
+ flags |= F_SOLO | F_FLOW | F_SYNC;
+
+ fair_child(i915, ctx, e, child_ns,
+ timeline, common, flags,
+ &result[nchild],
+ &result[child], &iqr[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nfences--)
+ timeline_advance(timeline, fence_ns);
+
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child]))
+ timeline_advance(timeline, fence_ns);
+ }
+
+ igt_waitchildren();
+ close(timeline);
+
+ /*
+ * Are we running out of CPU time, and fail to submit frames?
+ *
+ * We try to rule out any undue impact on the GPU scheduling
+ * from the CPU scheduler by looking for core saturation. If
+ * we may be in a situation where the clients + kernel are
+ * taking a whole core (think lockdep), then it is increasingly
+ * likely that our measurements include delays from the CPU
+ * scheduler. Err on the side of caution.
+ */
+ d_time = igt_nsec_elapsed(&tv);
+ getrusage(RUSAGE_CHILDREN, &usage);
+ cpu_time = d_cpu_time(&usage, &old_usage);
+ igt_debug("CPU usage: %.0f%%\n", 100. * cpu_time / d_time);
+ if (4 * cpu_time > 3 * d_time) {
+ if (nchild > 7) /* good enough to judge pass/fail */
+ break;
+
+ igt_skip_on_f(4 * cpu_time > 3 * d_time,
+ "%.0f%% CPU usage, presuming capacity exceeded\n",
+ 100. * cpu_time / d_time);
+ }
+
+ /* With no contention, we should match our target frametime */
+ if (nchild == 1) {
+ igt_assert(4 * result[0] > 3 * fence_ns &&
+ 3 * result[0] < 4 * fence_ns);
+ continue;
+ }
+
+ /*
+ * The VIP should always be able to hit the target frame rate;
+ * regardless of budget contention from lessor clients.
+ */
+ if (flags & (F_VIP | F_RRUL)) {
+ igt_info("VIP interval %.2fms, range %.2fms\n",
+ 1e-6 * result[0], 1e-6 * iqr[0]);
+ igt_assert_f(4 * result[0] > 3 * fence_ns &&
+ 3 * result[0] < 4 * fence_ns,
+ "VIP expects to run exactly when it wants, expects an interval of %.2fms, was %.2fms\n",
+ 1e-6 * fence_ns, 1e-6 * result[0]);
+ igt_assert_f(2 * iqr[0] < result[0],
+ "VIP frame IQR %.2fms exceeded median threshold %.2fms\n",
+ 1e-6 * iqr[0],
+ 1e-6 * result[0] / 2);
+ if (!--nchild)
+ continue;
+
+ /* Exclude the VIP result from the plebian statistics */
+ memmove(result, result + 1, nchild * sizeof(*result));
+ memmove(iqr, iqr + 1, nchild * sizeof(*iqr));
+ }
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ qsort(iqr, nchild, sizeof(*iqr), cmp_ul);
+
+ /*
+ * The target interval for median/mean is 16ms (fence_ns).
+ * However, this work is evenly split across the clients so
+ * the range (and median) of client medians may be much less
+ * than 16ms [16/3N]. We present median of medians to try
+ * and avoid any instability while running in CI; at the cost
+ * of insensitivity!
+ */
+ igt_info("%3d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f [%.1f, %.1f], mean: %.1f ± %.2f ms, cpu: %.0f%%\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * iqr[lo], 1e-6 * iqr[hi],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)),
+ 100. * cpu_time / d_time);
+
+ igt_assert_f(iqr[nchild / 2] < 2 * result[nchild / 2],
+ "Child frame IQR %.2fms exceeded median threshold %.2fms\n",
+ 1e-6 * iqr[nchild / 2],
+ 1e-6 * result[nchild / 2] * 2);
+
+ igt_assert_f(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+ 3 * igt_mean_get(&m) < 4 * result[nchild / 2],
+ "Mean of client interval %.2fms differs from median %.2fms, distribution is skewed\n",
+
+ 1e-6 * igt_mean_get(&m), 1e-6 * result[nchild / 2]);
+
+ igt_assert_f(result[nchild / 2] > frame_ns / 2,
+ "Median client interval %.2fms did not match target interval %.2fms\n",
+ 1e-6 * result[nchild / 2], 1e-6 * frame_ns);
+
+
+ igt_assert_f(result[hi] - result[lo] < result[nchild / 2],
+ "Interquartile range of client intervals %.2fms is as large as the median threshold %.2fms, clients are not evenly distributed!\n",
+ 1e-6 * (result[hi] - result[lo]),
+ 1e-6 * result[nchild / 2]);
+
+ /* May be slowed due to sheer volume of context switches */
+ if (result[0] > 2 * fence_ns)
+ break;
+ }
+
+ munmap(iqr, 4096);
+ munmap(result, 4096);
+ if (common)
+ gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+ static const struct {
+ const char *name;
+ unsigned int flags;
+ } fair[] = {
+ /*
+ * none - maximal greed in each client
+ *
+ * Push as many frames from each client as fast as possible
+ */
+ { "none", 0 },
+ { "none-vip", F_VIP }, /* one vip client must meet deadlines */
+ { "none-solo", F_SOLO }, /* 1 batch per frame per client */
+ { "none-share", F_SHARE }, /* read from a common buffer */
+ { "none-rrul", F_RRUL }, /* "realtime-response under load" */
+ { "none-ping", F_PING }, /* measure inter-engine fairness */
+
+ /*
+ * throttle - original per client throttling
+ *
+ * Used for front buffering rendering where there is no
+ * extenal frame marker. Each client tries to only keep
+ * 20ms of work submitted, though that measurement is
+ * flawed...
+ *
+ * This is used by Xorg to try and maintain some resembalance
+ * of input/output consistency when being feed a continuous
+ * stream of X11 draw requests straight into scanout, where
+ * the clients may submit the work faster than can be drawn.
+ *
+ * Throttling tracks requests per-file (and assumes that
+ * all requests are in submission order across the whole file),
+ * so we split each child to its own fd.
+ */
+ { "throttle", F_THROTTLE | F_ISOLATE },
+ { "throttle-vip", F_THROTTLE | F_ISOLATE | F_VIP },
+ { "throttle-solo", F_THROTTLE | F_ISOLATE | F_SOLO },
+ { "throttle-share", F_THROTTLE | F_ISOLATE | F_SHARE },
+ { "throttle-rrul", F_THROTTLE | F_ISOLATE | F_RRUL },
+
+ /*
+ * pace - mesa "submit double buffering"
+ *
+ * Submit a frame, wait for previous frame to start. This
+ * prevents each client from getting too far ahead of its
+ * rendering, maintaining a consistent input/output latency.
+ */
+ { "pace", F_PACE },
+ { "pace-solo", F_PACE | F_SOLO },
+ { "pace-share", F_PACE | F_SOLO | F_SHARE },
+ { "pace-ping", F_PACE | F_SOLO | F_SHARE | F_PING},
+
+ /* sync - only submit a frame at a time */
+ { "sync", F_SYNC },
+ { "sync-vip", F_SYNC | F_VIP },
+ { "sync-solo", F_SYNC | F_SOLO },
+
+ /* flow - synchronise execution against the clock (vblank) */
+ { "flow", F_PACE | F_FLOW },
+ { "flow-solo", F_PACE | F_FLOW | F_SOLO },
+ { "flow-share", F_PACE | F_FLOW | F_SHARE },
+ { "flow-ping", F_PACE | F_FLOW | F_SHARE | F_PING },
+
+ /* next - submit ahead of the clock (vblank double buffering) */
+ { "next", F_PACE | F_FLOW | F_NEXT },
+ { "next-solo", F_PACE | F_FLOW | F_NEXT | F_SOLO },
+ { "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+ { "next-ping", F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+ /* spare - underutilise by a single client timeslice */
+ { "spare", F_PACE | F_FLOW | F_SPARE },
+ { "spare-solo", F_PACE | F_FLOW | F_SPARE | F_SOLO },
+
+ /* half - run at half pace (submit 16ms of work every 32ms) */
+ { "half", F_PACE | F_FLOW | F_HALF },
+ { "half-solo", F_PACE | F_FLOW | F_HALF | F_SOLO },
+
+ {}
+ };
+
+ igt_fixture {
+ igt_info("CS timestamp frequency: %d\n",
+ read_timestamp_frequency(i915));
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ }
+
+ for (typeof(*fair) *f = fair; f->name; f++) {
+ igt_subtest_with_dynamic_f("fair-%s", f->name) {
+ const struct intel_execution_engine2 *e;
+
+ __for_each_physical_engine(i915, e) {
+ if (!gem_class_can_store_dword(i915, e->class))
+ continue;
+
+ igt_dynamic_f("%s", e->name)
+ fairness(i915, e, timeout, f->flags);
+ }
+ }
+ }
+}
+
static uint32_t read_ctx_timestamp(int i915,
uint32_t ctx,
const struct intel_execution_engine2 *e)
@@ -2789,6 +3582,10 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ igt_subtest_group {
+ test_fairness(fd, 2);
+ }
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.29.2
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness (rev14)
2020-12-16 15:24 ` [igt-dev] " Chris Wilson
(?)
@ 2020-12-16 17:34 ` Patchwork
-1 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2020-12-16 17:34 UTC (permalink / raw)
To: Chris Wilson; +Cc: igt-dev
[-- Attachment #1.1: Type: text/plain, Size: 4177 bytes --]
== Series Details ==
Series: i915/gem_exec_schedule: Try to spot unfairness (rev14)
URL : https://patchwork.freedesktop.org/series/77887/
State : success
== Summary ==
CI Bug Log - changes from CI_DRM_9493 -> IGTPW_5299
====================================================
Summary
-------
**SUCCESS**
No regressions found.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/index.html
Known issues
------------
Here are the changes found in IGTPW_5299 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@gem_flink_basic@double-flink:
- fi-tgl-y: [PASS][1] -> [DMESG-WARN][2] ([i915#402]) +2 similar issues
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/fi-tgl-y/igt@gem_flink_basic@double-flink.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/fi-tgl-y/igt@gem_flink_basic@double-flink.html
* igt@i915_selftest@live@gt_pm:
- fi-tgl-y: NOTRUN -> [DMESG-FAIL][3] ([i915#1759])
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/fi-tgl-y/igt@i915_selftest@live@gt_pm.html
#### Possible fixes ####
* igt@gem_exec_suspend@basic-s3:
- fi-tgl-y: [DMESG-WARN][4] ([i915#2411] / [i915#402]) -> [PASS][5]
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/fi-tgl-y/igt@gem_exec_suspend@basic-s3.html
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/fi-tgl-y/igt@gem_exec_suspend@basic-s3.html
* igt@vgem_basic@setversion:
- fi-tgl-y: [DMESG-WARN][6] ([i915#402]) -> [PASS][7] +1 similar issue
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/fi-tgl-y/igt@vgem_basic@setversion.html
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/fi-tgl-y/igt@vgem_basic@setversion.html
#### Warnings ####
* igt@i915_selftest@live@gt_lrc:
- fi-tgl-y: [INCOMPLETE][8] -> [DMESG-FAIL][9] ([i915#2373])
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/fi-tgl-y/igt@i915_selftest@live@gt_lrc.html
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/fi-tgl-y/igt@i915_selftest@live@gt_lrc.html
[i915#1759]: https://gitlab.freedesktop.org/drm/intel/issues/1759
[i915#2373]: https://gitlab.freedesktop.org/drm/intel/issues/2373
[i915#2411]: https://gitlab.freedesktop.org/drm/intel/issues/2411
[i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
Participating hosts (42 -> 38)
------------------------------
Missing (4): fi-dg1-1 fi-bsw-cyan fi-bdw-samus fi-hsw-4200u
Build changes
-------------
* CI: CI-20190529 -> None
* IGT: IGT_5905 -> IGTPW_5299
CI-20190529: 20190529
CI_DRM_9493: 27c8bb9a6204aea5be2a779ca1f36482149de9bf @ git://anongit.freedesktop.org/gfx-ci/linux
IGTPW_5299: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/index.html
IGT_5905: 3d0934900bddeb7a68f1abab4cd05077f0609e32 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
== Testlist changes ==
+igt@gem_exec_schedule@fair-flow
+igt@gem_exec_schedule@fair-flow-ping
+igt@gem_exec_schedule@fair-flow-share
+igt@gem_exec_schedule@fair-flow-solo
+igt@gem_exec_schedule@fair-half
+igt@gem_exec_schedule@fair-half-solo
+igt@gem_exec_schedule@fair-next
+igt@gem_exec_schedule@fair-next-ping
+igt@gem_exec_schedule@fair-next-share
+igt@gem_exec_schedule@fair-next-solo
+igt@gem_exec_schedule@fair-none
+igt@gem_exec_schedule@fair-none-ping
+igt@gem_exec_schedule@fair-none-rrul
+igt@gem_exec_schedule@fair-none-share
+igt@gem_exec_schedule@fair-none-solo
+igt@gem_exec_schedule@fair-none-vip
+igt@gem_exec_schedule@fair-pace
+igt@gem_exec_schedule@fair-pace-ping
+igt@gem_exec_schedule@fair-pace-share
+igt@gem_exec_schedule@fair-pace-solo
+igt@gem_exec_schedule@fair-spare
+igt@gem_exec_schedule@fair-spare-solo
+igt@gem_exec_schedule@fair-sync
+igt@gem_exec_schedule@fair-sync-solo
+igt@gem_exec_schedule@fair-sync-vip
+igt@gem_exec_schedule@fair-throttle
+igt@gem_exec_schedule@fair-throttle-rrul
+igt@gem_exec_schedule@fair-throttle-share
+igt@gem_exec_schedule@fair-throttle-solo
+igt@gem_exec_schedule@fair-throttle-vip
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/index.html
[-- Attachment #1.2: Type: text/html, Size: 5204 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 16+ messages in thread
* [igt-dev] ✗ Fi.CI.IGT: failure for i915/gem_exec_schedule: Try to spot unfairness (rev14)
2020-12-16 15:24 ` [igt-dev] " Chris Wilson
(?)
(?)
@ 2020-12-16 20:37 ` Patchwork
-1 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2020-12-16 20:37 UTC (permalink / raw)
To: Chris Wilson; +Cc: igt-dev
[-- Attachment #1.1: Type: text/plain, Size: 30272 bytes --]
== Series Details ==
Series: i915/gem_exec_schedule: Try to spot unfairness (rev14)
URL : https://patchwork.freedesktop.org/series/77887/
State : failure
== Summary ==
CI Bug Log - changes from CI_DRM_9493_full -> IGTPW_5299_full
====================================================
Summary
-------
**FAILURE**
Serious unknown changes coming with IGTPW_5299_full absolutely need to be
verified manually.
If you think the reported changes have nothing to do with the changes
introduced in IGTPW_5299_full, please notify your bug team to allow them
to document this new failure mode, which will reduce false positives in CI.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/index.html
Possible new issues
-------------------
Here are the unknown changes that may have been introduced in IGTPW_5299_full:
### IGT changes ###
#### Possible regressions ####
* {igt@gem_exec_schedule@fair-next-ping@vecs0} (NEW):
- shard-iclb: NOTRUN -> [SKIP][1] +15 similar issues
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-iclb8/igt@gem_exec_schedule@fair-next-ping@vecs0.html
* {igt@gem_exec_schedule@fair-none-ping@rcs0} (NEW):
- shard-tglb: NOTRUN -> [SKIP][2] +19 similar issues
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-tglb3/igt@gem_exec_schedule@fair-none-ping@rcs0.html
* {igt@gem_exec_schedule@fair-pace-share@vecs0} (NEW):
- shard-glk: NOTRUN -> [FAIL][3] +8 similar issues
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-glk3/igt@gem_exec_schedule@fair-pace-share@vecs0.html
* {igt@gem_exec_schedule@fair-pace-solo@vcs1} (NEW):
- shard-kbl: NOTRUN -> [FAIL][4] +11 similar issues
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-kbl4/igt@gem_exec_schedule@fair-pace-solo@vcs1.html
* {igt@gem_exec_schedule@fair-throttle-solo@vcs1} (NEW):
- shard-tglb: NOTRUN -> [FAIL][5] +55 similar issues
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-tglb1/igt@gem_exec_schedule@fair-throttle-solo@vcs1.html
* {igt@gem_exec_schedule@fair-throttle@vcs0} (NEW):
- shard-iclb: NOTRUN -> [FAIL][6] +28 similar issues
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-iclb4/igt@gem_exec_schedule@fair-throttle@vcs0.html
* igt@kms_flip@flip-vs-panning-interruptible@b-vga1:
- shard-hsw: [PASS][7] -> [INCOMPLETE][8]
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-hsw7/igt@kms_flip@flip-vs-panning-interruptible@b-vga1.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-hsw4/igt@kms_flip@flip-vs-panning-interruptible@b-vga1.html
New tests
---------
New tests have been introduced between CI_DRM_9493_full and IGTPW_5299_full:
### New IGT tests (174) ###
* igt@gem_exec_schedule@fair-flow:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-flow-ping:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-flow-ping@bcs0:
- Statuses : 5 skip(s)
- Exec time: [0.0, 4.22] s
* igt@gem_exec_schedule@fair-flow-ping@rcs0:
- Statuses : 5 skip(s)
- Exec time: [2.13, 4.21] s
* igt@gem_exec_schedule@fair-flow-ping@vcs0:
- Statuses : 5 skip(s)
- Exec time: [2.16, 4.22] s
* igt@gem_exec_schedule@fair-flow-ping@vcs1:
- Statuses : 3 skip(s)
- Exec time: [2.16, 4.23] s
* igt@gem_exec_schedule@fair-flow-ping@vecs0:
- Statuses : 5 skip(s)
- Exec time: [2.14, 4.22] s
* igt@gem_exec_schedule@fair-flow-share:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-flow-share@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 13.10] s
* igt@gem_exec_schedule@fair-flow-share@rcs0:
- Statuses : 5 pass(s)
- Exec time: [8.58, 13.06] s
* igt@gem_exec_schedule@fair-flow-share@vcs0:
- Statuses : 5 pass(s)
- Exec time: [9.03, 13.06] s
* igt@gem_exec_schedule@fair-flow-share@vcs1:
- Statuses : 3 pass(s)
- Exec time: [10.89, 13.07] s
* igt@gem_exec_schedule@fair-flow-share@vecs0:
- Statuses : 5 pass(s)
- Exec time: [9.03, 13.10] s
* igt@gem_exec_schedule@fair-flow-solo:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-flow@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 13.45] s
* igt@gem_exec_schedule@fair-flow@rcs0:
- Statuses : 5 pass(s)
- Exec time: [8.98, 13.34] s
* igt@gem_exec_schedule@fair-flow@vcs0:
- Statuses : 5 pass(s)
- Exec time: [9.00, 13.46] s
* igt@gem_exec_schedule@fair-flow@vcs1:
- Statuses : 2 pass(s)
- Exec time: [11.13, 13.12] s
* igt@gem_exec_schedule@fair-flow@vecs0:
- Statuses : 5 pass(s)
- Exec time: [8.82, 13.49] s
* igt@gem_exec_schedule@fair-half:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-half-solo:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-half-solo@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 13.51] s
* igt@gem_exec_schedule@fair-half-solo@rcs0:
- Statuses : 5 pass(s)
- Exec time: [11.50, 15.31] s
* igt@gem_exec_schedule@fair-half-solo@vcs0:
- Statuses : 5 pass(s)
- Exec time: [11.51, 15.47] s
* igt@gem_exec_schedule@fair-half-solo@vcs1:
- Statuses : 2 pass(s)
- Exec time: [11.49, 13.38] s
* igt@gem_exec_schedule@fair-half-solo@vecs0:
- Statuses : 5 pass(s)
- Exec time: [11.59, 15.42] s
* igt@gem_exec_schedule@fair-half@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 13.44] s
* igt@gem_exec_schedule@fair-half@rcs0:
- Statuses : 5 pass(s)
- Exec time: [8.96, 13.58] s
* igt@gem_exec_schedule@fair-half@vcs0:
- Statuses : 5 pass(s)
- Exec time: [9.04, 13.40] s
* igt@gem_exec_schedule@fair-half@vcs1:
- Statuses : 2 pass(s)
- Exec time: [9.03, 13.37] s
* igt@gem_exec_schedule@fair-half@vecs0:
- Statuses : 5 pass(s)
- Exec time: [9.01, 13.73] s
* igt@gem_exec_schedule@fair-next:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-next-ping:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-next-ping@bcs0:
- Statuses : 5 skip(s)
- Exec time: [0.0, 4.21] s
* igt@gem_exec_schedule@fair-next-ping@rcs0:
- Statuses : 5 skip(s)
- Exec time: [2.14, 4.25] s
* igt@gem_exec_schedule@fair-next-ping@vcs0:
- Statuses : 5 skip(s)
- Exec time: [2.11, 4.21] s
* igt@gem_exec_schedule@fair-next-ping@vcs1:
- Statuses : 2 skip(s)
- Exec time: [2.14, 4.22] s
* igt@gem_exec_schedule@fair-next-ping@vecs0:
- Statuses : 5 skip(s)
- Exec time: [2.15, 4.21] s
* igt@gem_exec_schedule@fair-next-share:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-next-share@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 13.04] s
* igt@gem_exec_schedule@fair-next-share@rcs0:
- Statuses : 5 pass(s)
- Exec time: [8.98, 13.04] s
* igt@gem_exec_schedule@fair-next-share@vcs0:
- Statuses : 5 pass(s)
- Exec time: [9.01, 13.04] s
* igt@gem_exec_schedule@fair-next-share@vcs1:
- Statuses : 2 pass(s)
- Exec time: [11.13, 13.05] s
* igt@gem_exec_schedule@fair-next-share@vecs0:
- Statuses : 5 pass(s)
- Exec time: [9.01, 13.05] s
* igt@gem_exec_schedule@fair-next-solo:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-next-solo@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 10.86] s
* igt@gem_exec_schedule@fair-next-solo@rcs0:
- Statuses : 5 pass(s)
- Exec time: [8.94, 11.53] s
* igt@gem_exec_schedule@fair-next-solo@vcs0:
- Statuses : 5 pass(s)
- Exec time: [8.92, 13.89] s
* igt@gem_exec_schedule@fair-next-solo@vcs1:
- Statuses : 3 pass(s)
- Exec time: [10.66, 11.28] s
* igt@gem_exec_schedule@fair-next-solo@vecs0:
- Statuses : 5 pass(s)
- Exec time: [8.92, 13.82] s
* igt@gem_exec_schedule@fair-next@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 13.06] s
* igt@gem_exec_schedule@fair-next@rcs0:
- Statuses : 5 pass(s)
- Exec time: [8.64, 13.06] s
* igt@gem_exec_schedule@fair-next@vcs0:
- Statuses : 5 pass(s)
- Exec time: [8.63, 13.07] s
* igt@gem_exec_schedule@fair-next@vcs1:
- Statuses : 2 pass(s)
- Exec time: [11.12, 13.06] s
* igt@gem_exec_schedule@fair-next@vecs0:
- Statuses : 5 pass(s)
- Exec time: [8.63, 13.06] s
* igt@gem_exec_schedule@fair-none:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-none-ping:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-none-ping@bcs0:
- Statuses : 5 skip(s)
- Exec time: [0.0, 5.05] s
* igt@gem_exec_schedule@fair-none-ping@rcs0:
- Statuses : 5 skip(s)
- Exec time: [2.39, 5.04] s
* igt@gem_exec_schedule@fair-none-ping@vcs0:
- Statuses : 5 skip(s)
- Exec time: [2.56, 5.52] s
* igt@gem_exec_schedule@fair-none-ping@vcs1:
- Statuses : 2 skip(s)
- Exec time: [2.67, 4.81] s
* igt@gem_exec_schedule@fair-none-ping@vecs0:
- Statuses : 5 skip(s)
- Exec time: [2.53, 5.38] s
* igt@gem_exec_schedule@fair-none-rrul:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-none-rrul@bcs0:
- Statuses : 1 fail(s) 1 pass(s) 3 skip(s)
- Exec time: [0.0, 11.92] s
* igt@gem_exec_schedule@fair-none-rrul@rcs0:
- Statuses : 1 fail(s) 4 pass(s)
- Exec time: [9.46, 11.75] s
* igt@gem_exec_schedule@fair-none-rrul@vcs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [6.91, 12.39] s
* igt@gem_exec_schedule@fair-none-rrul@vcs1:
- Statuses : 1 fail(s) 1 pass(s)
- Exec time: [6.63, 12.45] s
* igt@gem_exec_schedule@fair-none-rrul@vecs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [9.09, 12.03] s
* igt@gem_exec_schedule@fair-none-share:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-none-share@bcs0:
- Statuses : 1 fail(s) 1 pass(s) 3 skip(s)
- Exec time: [0.0, 9.73] s
* igt@gem_exec_schedule@fair-none-share@rcs0:
- Statuses : 4 pass(s) 1 skip(s)
- Exec time: [7.09, 12.17] s
* igt@gem_exec_schedule@fair-none-share@vcs0:
- Statuses : 4 pass(s) 1 skip(s)
- Exec time: [7.49, 11.95] s
* igt@gem_exec_schedule@fair-none-share@vcs1:
- Statuses : 1 fail(s) 2 pass(s)
- Exec time: [9.51, 10.05] s
* igt@gem_exec_schedule@fair-none-share@vecs0:
- Statuses : 1 fail(s) 3 pass(s) 1 skip(s)
- Exec time: [7.54, 10.44] s
* igt@gem_exec_schedule@fair-none-solo:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-none-solo@bcs0:
- Statuses : 2 fail(s) 3 skip(s)
- Exec time: [0.0, 14.20] s
* igt@gem_exec_schedule@fair-none-solo@rcs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [10.56, 14.11] s
* igt@gem_exec_schedule@fair-none-solo@vcs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [11.95, 18.59] s
* igt@gem_exec_schedule@fair-none-solo@vcs1:
- Statuses : 1 fail(s) 1 pass(s)
- Exec time: [13.36, 18.73] s
* igt@gem_exec_schedule@fair-none-solo@vecs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [12.03, 17.14] s
* igt@gem_exec_schedule@fair-none-vip:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-none-vip@bcs0:
- Statuses : 1 fail(s) 1 pass(s) 3 skip(s)
- Exec time: [0.0, 12.98] s
* igt@gem_exec_schedule@fair-none-vip@rcs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [9.77, 12.67] s
* igt@gem_exec_schedule@fair-none-vip@vcs0:
- Statuses : 1 fail(s) 3 pass(s) 1 skip(s)
- Exec time: [7.91, 13.67] s
* igt@gem_exec_schedule@fair-none-vip@vcs1:
- Statuses : 1 fail(s) 1 pass(s)
- Exec time: [9.69, 10.66] s
* igt@gem_exec_schedule@fair-none-vip@vecs0:
- Statuses : 1 fail(s) 4 pass(s)
- Exec time: [9.68, 13.02] s
* igt@gem_exec_schedule@fair-none@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 12.07] s
* igt@gem_exec_schedule@fair-none@rcs0:
- Statuses : 5 pass(s)
- Exec time: [9.45, 12.22] s
* igt@gem_exec_schedule@fair-none@vcs0:
- Statuses : 1 fail(s) 3 pass(s) 1 skip(s)
- Exec time: [7.59, 11.02] s
* igt@gem_exec_schedule@fair-none@vcs1:
- Statuses : 2 pass(s)
- Exec time: [10.22, 11.92] s
* igt@gem_exec_schedule@fair-none@vecs0:
- Statuses : 1 fail(s) 3 pass(s) 1 skip(s)
- Exec time: [7.63, 10.91] s
* igt@gem_exec_schedule@fair-pace:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-pace-ping:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-pace-ping@bcs0:
- Statuses : 5 skip(s)
- Exec time: [0.0, 4.18] s
* igt@gem_exec_schedule@fair-pace-ping@rcs0:
- Statuses : 5 skip(s)
- Exec time: [2.09, 4.44] s
* igt@gem_exec_schedule@fair-pace-ping@vcs0:
- Statuses : 5 skip(s)
- Exec time: [2.11, 4.39] s
* igt@gem_exec_schedule@fair-pace-ping@vcs1:
- Statuses : 2 skip(s)
- Exec time: [2.14, 4.18] s
* igt@gem_exec_schedule@fair-pace-ping@vecs0:
- Statuses : 5 skip(s)
- Exec time: [2.12, 4.39] s
* igt@gem_exec_schedule@fair-pace-share:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-pace-share@bcs0:
- Statuses : 1 fail(s) 1 pass(s) 3 skip(s)
- Exec time: [0.0, 10.61] s
* igt@gem_exec_schedule@fair-pace-share@rcs0:
- Statuses : 3 fail(s) 2 pass(s)
- Exec time: [8.50, 11.86] s
* igt@gem_exec_schedule@fair-pace-share@vcs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [8.39, 11.74] s
* igt@gem_exec_schedule@fair-pace-share@vcs1:
- Statuses : 2 fail(s)
- Exec time: [8.59, 10.58] s
* igt@gem_exec_schedule@fair-pace-share@vecs0:
- Statuses : 3 fail(s) 2 pass(s)
- Exec time: [8.56, 10.63] s
* igt@gem_exec_schedule@fair-pace-solo:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-pace-solo@bcs0:
- Statuses : 1 fail(s) 1 pass(s) 3 skip(s)
- Exec time: [0.0, 10.59] s
* igt@gem_exec_schedule@fair-pace-solo@rcs0:
- Statuses : 3 fail(s) 2 pass(s)
- Exec time: [8.58, 10.61] s
* igt@gem_exec_schedule@fair-pace-solo@vcs0:
- Statuses : 3 fail(s) 2 pass(s)
- Exec time: [8.61, 10.62] s
* igt@gem_exec_schedule@fair-pace-solo@vcs1:
- Statuses : 2 fail(s) 1 pass(s)
- Exec time: [8.59, 10.60] s
* igt@gem_exec_schedule@fair-pace-solo@vecs0:
- Statuses : 3 fail(s) 2 pass(s)
- Exec time: [8.61, 10.62] s
* igt@gem_exec_schedule@fair-pace@bcs0:
- Statuses : 2 fail(s) 3 skip(s)
- Exec time: [0.0, 10.62] s
* igt@gem_exec_schedule@fair-pace@rcs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [8.50, 12.24] s
* igt@gem_exec_schedule@fair-pace@vcs0:
- Statuses : 1 fail(s) 4 pass(s)
- Exec time: [8.39, 10.62] s
* igt@gem_exec_schedule@fair-pace@vcs1:
- Statuses : 1 fail(s) 1 pass(s)
- Exec time: [10.63, 10.91] s
* igt@gem_exec_schedule@fair-pace@vecs0:
- Statuses : 1 fail(s) 4 pass(s)
- Exec time: [8.42, 10.91] s
* igt@gem_exec_schedule@fair-spare:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-spare-solo:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-spare-solo@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 10.83] s
* igt@gem_exec_schedule@fair-spare-solo@rcs0:
- Statuses : 5 pass(s)
- Exec time: [8.92, 11.70] s
* igt@gem_exec_schedule@fair-spare-solo@vcs0:
- Statuses : 5 pass(s)
- Exec time: [8.89, 13.92] s
* igt@gem_exec_schedule@fair-spare-solo@vcs1:
- Statuses : 3 pass(s)
- Exec time: [10.74, 13.92] s
* igt@gem_exec_schedule@fair-spare-solo@vecs0:
- Statuses : 5 pass(s)
- Exec time: [8.90, 11.75] s
* igt@gem_exec_schedule@fair-spare@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 13.37] s
* igt@gem_exec_schedule@fair-spare@rcs0:
- Statuses : 5 pass(s)
- Exec time: [8.81, 13.32] s
* igt@gem_exec_schedule@fair-spare@vcs0:
- Statuses : 5 pass(s)
- Exec time: [9.01, 13.13] s
* igt@gem_exec_schedule@fair-spare@vcs1:
- Statuses : 2 pass(s)
- Exec time: [11.21, 13.11] s
* igt@gem_exec_schedule@fair-spare@vecs0:
- Statuses : 5 pass(s)
- Exec time: [9.05, 13.43] s
* igt@gem_exec_schedule@fair-sync:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-sync-solo:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-sync-vip:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-sync-vip@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 12.97] s
* igt@gem_exec_schedule@fair-sync-vip@rcs0:
- Statuses : 5 pass(s)
- Exec time: [8.73, 12.97] s
* igt@gem_exec_schedule@fair-sync-vip@vcs0:
- Statuses : 5 pass(s)
- Exec time: [8.80, 12.96] s
* igt@gem_exec_schedule@fair-sync-vip@vcs1:
- Statuses : 2 pass(s)
- Exec time: [11.00, 12.98] s
* igt@gem_exec_schedule@fair-sync-vip@vecs0:
- Statuses : 5 pass(s)
- Exec time: [8.64, 12.98] s
* igt@gem_exec_schedule@fair-sync@bcs0:
- Statuses : 2 pass(s) 3 skip(s)
- Exec time: [0.0, 12.91] s
* igt@gem_exec_schedule@fair-sync@rcs0:
- Statuses : 5 pass(s)
- Exec time: [8.71, 12.88] s
* igt@gem_exec_schedule@fair-sync@vcs0:
- Statuses : 5 pass(s)
- Exec time: [8.66, 12.90] s
* igt@gem_exec_schedule@fair-sync@vcs1:
- Statuses : 2 pass(s)
- Exec time: [10.80, 12.89] s
* igt@gem_exec_schedule@fair-sync@vecs0:
- Statuses : 5 pass(s)
- Exec time: [8.65, 12.88] s
* igt@gem_exec_schedule@fair-throttle:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-throttle-rrul:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-throttle-rrul@bcs0:
- Statuses : 2 fail(s) 3 skip(s)
- Exec time: [0.0, 8.79] s
* igt@gem_exec_schedule@fair-throttle-rrul@rcs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [8.70, 12.41] s
* igt@gem_exec_schedule@fair-throttle-rrul@vcs0:
- Statuses : 4 fail(s) 1 pass(s)
- Exec time: [8.78, 9.21] s
* igt@gem_exec_schedule@fair-throttle-rrul@vcs1:
- Statuses : 3 fail(s)
- Exec time: [8.73, 8.89] s
* igt@gem_exec_schedule@fair-throttle-rrul@vecs0:
- Statuses : 3 fail(s) 2 pass(s)
- Exec time: [8.71, 9.12] s
* igt@gem_exec_schedule@fair-throttle-share:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-throttle-share@bcs0:
- Statuses : 1 fail(s) 1 pass(s) 3 skip(s)
- Exec time: [0.0, 11.43] s
* igt@gem_exec_schedule@fair-throttle-share@rcs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [9.03, 12.79] s
* igt@gem_exec_schedule@fair-throttle-share@vcs0:
- Statuses : 1 fail(s) 4 pass(s)
- Exec time: [9.03, 11.38] s
* igt@gem_exec_schedule@fair-throttle-share@vcs1:
- Statuses : 1 fail(s) 1 pass(s)
- Exec time: [9.13, 11.10] s
* igt@gem_exec_schedule@fair-throttle-share@vecs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [8.76, 11.51] s
* igt@gem_exec_schedule@fair-throttle-solo:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-throttle-solo@bcs0:
- Statuses : 2 fail(s) 3 skip(s)
- Exec time: [0.0, 13.21] s
* igt@gem_exec_schedule@fair-throttle-solo@rcs0:
- Statuses : 1 fail(s) 4 pass(s)
- Exec time: [9.30, 12.61] s
* igt@gem_exec_schedule@fair-throttle-solo@vcs0:
- Statuses : 1 fail(s) 4 pass(s)
- Exec time: [11.97, 13.08] s
* igt@gem_exec_schedule@fair-throttle-solo@vcs1:
- Statuses : 2 fail(s) 1 pass(s)
- Exec time: [11.83, 12.84] s
* igt@gem_exec_schedule@fair-throttle-solo@vecs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [11.95, 13.17] s
* igt@gem_exec_schedule@fair-throttle-vip:
- Statuses : 2 skip(s)
- Exec time: [0.0] s
* igt@gem_exec_schedule@fair-throttle-vip@bcs0:
- Statuses : 2 fail(s) 3 skip(s)
- Exec time: [0.0, 9.77] s
* igt@gem_exec_schedule@fair-throttle-vip@rcs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [9.07, 11.71] s
* igt@gem_exec_schedule@fair-throttle-vip@vcs0:
- Statuses : 1 fail(s) 4 pass(s)
- Exec time: [9.21, 12.89] s
* igt@gem_exec_schedule@fair-throttle-vip@vcs1:
- Statuses : 3 fail(s)
- Exec time: [9.25, 9.82] s
* igt@gem_exec_schedule@fair-throttle-vip@vecs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [9.22, 12.01] s
* igt@gem_exec_schedule@fair-throttle@bcs0:
- Statuses : 2 fail(s) 3 skip(s)
- Exec time: [0.0, 11.44] s
* igt@gem_exec_schedule@fair-throttle@rcs0:
- Statuses : 1 fail(s) 4 pass(s)
- Exec time: [8.88, 12.97] s
* igt@gem_exec_schedule@fair-throttle@vcs0:
- Statuses : 3 fail(s) 2 pass(s)
- Exec time: [8.95, 11.52] s
* igt@gem_exec_schedule@fair-throttle@vcs1:
- Statuses : 2 fail(s) 1 pass(s)
- Exec time: [8.96, 11.75] s
* igt@gem_exec_schedule@fair-throttle@vecs0:
- Statuses : 2 fail(s) 3 pass(s)
- Exec time: [9.08, 11.77] s
* igt@kms_atomic_transition@modeset-transition:
- Statuses :
- Exec time: [None] s
* igt@kms_atomic_transition@modeset-transition-fencing:
- Statuses :
- Exec time: [None] s
* igt@kms_atomic_transition@modeset-transition-nonblocking:
- Statuses :
- Exec time: [None] s
* igt@kms_atomic_transition@modeset-transition-nonblocking-fencing:
- Statuses :
- Exec time: [None] s
Known issues
------------
Here are the changes found in IGTPW_5299_full that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@gem_exec_params@secure-non-master:
- shard-tglb: NOTRUN -> [SKIP][9] ([fdo#112283])
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-tglb5/igt@gem_exec_params@secure-non-master.html
- shard-iclb: NOTRUN -> [SKIP][10] ([fdo#112283])
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-iclb2/igt@gem_exec_params@secure-non-master.html
* {igt@gem_exec_schedule@fair-flow-ping@rcs0} (NEW):
- shard-kbl: NOTRUN -> [SKIP][11] ([fdo#109271]) +90 similar issues
[11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-kbl3/igt@gem_exec_schedule@fair-flow-ping@rcs0.html
* {igt@gem_exec_schedule@fair-flow-ping@vcs1} (NEW):
- shard-iclb: NOTRUN -> [SKIP][12] ([fdo#109276])
[12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-iclb4/igt@gem_exec_schedule@fair-flow-ping@vcs1.html
* {igt@gem_exec_schedule@fair-next-ping@vecs0} (NEW):
- shard-glk: NOTRUN -> [SKIP][13] ([fdo#109271]) +49 similar issues
[13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-glk4/igt@gem_exec_schedule@fair-next-ping@vecs0.html
* {igt@gem_exec_schedule@fair-none-vip} (NEW):
- shard-snb: NOTRUN -> [SKIP][14] ([fdo#109271]) +29 similar issues
[14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-snb6/igt@gem_exec_schedule@fair-none-vip.html
* {igt@gem_exec_schedule@fair-pace-share@bcs0} (NEW):
- shard-apl: NOTRUN -> [SKIP][15] ([fdo#109271]) +60 similar issues
[15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-apl4/igt@gem_exec_schedule@fair-pace-share@bcs0.html
* {igt@gem_exec_schedule@fair-pace-solo} (NEW):
- shard-hsw: NOTRUN -> [SKIP][16] ([fdo#109271]) +44 similar issues
[16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-hsw8/igt@gem_exec_schedule@fair-pace-solo.html
* igt@gem_huc_copy@huc-copy:
- shard-tglb: [PASS][17] -> [SKIP][18] ([i915#2190])
[17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-tglb8/igt@gem_huc_copy@huc-copy.html
[18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-tglb6/igt@gem_huc_copy@huc-copy.html
* igt@gem_userptr_blits@process-exit-mmap-busy@uc:
- shard-kbl: NOTRUN -> [SKIP][19] ([fdo#109271] / [i915#1699]) +3 similar issues
[19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-kbl2/igt@gem_userptr_blits@process-exit-mmap-busy@uc.html
* igt@i915_pm_dc@dc6-psr:
- shard-iclb: [PASS][20] -> [FAIL][21] ([i915#454])
[20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-iclb6/igt@i915_pm_dc@dc6-psr.html
[21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-iclb4/igt@i915_pm_dc@dc6-psr.html
* igt@i915_pm_rpm@reg-read-ioctl:
- shard-iclb: [PASS][22] -> [SKIP][23] ([i915#579])
[22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-iclb3/igt@i915_pm_rpm@reg-read-ioctl.html
[23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-iclb8/igt@i915_pm_rpm@reg-read-ioctl.html
- shard-hsw: [PASS][24] -> [SKIP][25] ([fdo#109271])
[24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-hsw1/igt@i915_pm_rpm@reg-read-ioctl.html
[25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-hsw2/igt@i915_pm_rpm@reg-read-ioctl.html
- shard-tglb: [PASS][26] -> [SKIP][27] ([i915#579])
[26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-tglb2/igt@i915_pm_rpm@reg-read-ioctl.html
[27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-tglb6/igt@i915_pm_rpm@reg-read-ioctl.html
- shard-kbl: [PASS][28] -> [SKIP][29] ([fdo#109271])
[28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-kbl6/igt@i915_pm_rpm@reg-read-ioctl.html
[29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-kbl7/igt@i915_pm_rpm@reg-read-ioctl.html
- shard-apl: [PASS][30] -> [SKIP][31] ([fdo#109271])
[30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-apl8/igt@i915_pm_rpm@reg-read-ioctl.html
[31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-apl6/igt@i915_pm_rpm@reg-read-ioctl.html
- shard-glk: [PASS][32] -> [SKIP][33] ([fdo#109271])
[32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-glk2/igt@i915_pm_rpm@reg-read-ioctl.html
[33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-glk3/igt@i915_pm_rpm@reg-read-ioctl.html
* igt@kms_big_fb@yf-tiled-64bpp-rotate-270:
- shard-tglb: NOTRUN -> [SKIP][34] ([fdo#111615]) +1 similar issue
[34]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-tglb8/igt@kms_big_fb@yf-tiled-64bpp-rotate-270.html
- shard-iclb: NOTRUN -> [SKIP][35] ([fdo#110723])
[35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-iclb2/igt@kms_big_fb@yf-tiled-64bpp-rotate-270.html
* igt@kms_chamelium@hdmi-aspect-ratio:
- shard-hsw: NOTRUN -> [SKIP][36] ([fdo#109271] / [fdo#111827])
[36]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-hsw8/igt@kms_chamelium@hdmi-aspect-ratio.html
* igt@kms_color_chamelium@pipe-d-ctm-0-25:
- shard-kbl: NOTRUN -> [SKIP][37] ([fdo#109271] / [fdo#111827]) +2 similar issues
[37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-kbl3/igt@kms_color_chamelium@pipe-d-ctm-0-25.html
* igt@kms_cursor_legacy@2x-long-flip-vs-cursor-legacy:
- shard-glk: [PASS][38] -> [FAIL][39] ([i915#72])
[38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-glk2/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-legacy.html
[39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-glk8/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-legacy.html
* igt@kms_cursor_legacy@cursor-vs-flip-toggle:
- shard-hsw: [PASS][40] -> [FAIL][41] ([i915#2370])
[40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-hsw4/igt@kms_cursor_legacy@cursor-vs-flip-toggle.html
[41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-hsw1/igt@kms_cursor_legacy@cursor-vs-flip-toggle.html
* igt@kms_draw_crc@draw-method-rgb565-blt-xtiled:
- shard-snb: [PASS][42] -> [SKIP][43] ([fdo#109271]) +2 similar issues
[42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-snb2/igt@kms_draw_crc@draw-method-rgb565-blt-xtiled.html
[43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-snb2/igt@kms_draw_crc@draw-method-rgb565-blt-xtiled.html
* igt@kms_flip@2x-flip-vs-modeset-vs-hang:
- shard-iclb: NOTRUN -> [SKIP][44] ([fdo#109274]) +2 similar issues
[44]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-iclb7/igt@kms_flip@2x-flip-vs-modeset-vs-hang.html
* igt@kms_flip@flip-vs-blocking-wf-vblank@a-edp1:
- shard-tglb: [PASS][45] -> [FAIL][46] ([i915#2122])
[45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-tglb5/igt@kms_flip@flip-vs-blocking-wf-vblank@a-edp1.html
[46]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-tglb6/igt@kms_flip@flip-vs-blocking-wf-vblank@a-edp1.html
* igt@kms_flip@flip-vs-expired-vblank@a-edp1:
- shard-tglb: [PASS][47] -> [FAIL][48] ([i915#2598])
[47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-tglb8/igt@kms_flip@flip-vs-expired-vblank@a-edp1.html
[48]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-tglb7/igt@kms_flip@flip-vs-expired-vblank@a-edp1.html
* igt@kms_flip@flip-vs-expired-vblank@c-dp1:
- shard-apl: [PASS][49] -> [FAIL][50] ([i915#79])
[49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9493/shard-apl4/igt@kms_flip@flip-vs-expired-vblank@c-dp1.html
[50]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/shard-apl8/igt@kms_flip@flip-vs-expired-vblank@c-dp1.html
* igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs:
- shard-kbl: NOTRUN -> [SKIP][51] ([fdo#109271] / [i915#2672])
[51]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_5299/index.html
[-- Attachment #1.2: Type: text/html, Size: 37032 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev
^ permalink raw reply [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-12-10 2:09 Chris Wilson
0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-12-10 2:09 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
v2: igt_assert_f and more commentary; exclude vip from client stats,
include range of frame intervals from each individual client
v3: Write down what the test actually does!
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 954 +++++++++++++++++++++++++++++++++
1 file changed, 954 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index f23d63ac3..67cf88e72 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
#include <sys/poll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/syscall.h>
#include <sched.h>
#include <signal.h>
@@ -2516,6 +2517,926 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ctx_ticks(int i915, uint64_t ns)
+{
+ int f = read_timestamp_frequency(i915);
+ if (intel_gen(intel_get_drm_devid(i915)) == 11)
+ f = 12500000; /* icl!!! are you feeling alright? CTX vs CS */
+ return div64_u64_round_up(ns * f, NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ /* Loop until CTX_TIMESTAMP - initial > @ns */
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(START_TS);
+
+ while (offset_in_page(cs) & 63)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(NOW_TS);
+
+ /* delta = now - start; inverted to match COND_BBE */
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ /* Save delta for reading by COND_BBE */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Delay between SRM and COND_BBE to post the writes */
+ for (int n = 0; n < 8; n++) {
+ *cs++ = MI_STORE_DWORD_IMM;
+ if (use_64b) {
+ *cs++ = addr + 4064;
+ *cs++ = addr >> 32;
+ } else {
+ *cs++ = 0;
+ *cs++ = addr + 4064;
+ }
+ *cs++ = 0;
+ }
+
+ /* Break if delta [time elapsed] > ns */
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ctx_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Otherwise back to recalculating delta */
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { INC, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ /* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ /* Load the address + inc & mask variables */
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ /* Increment the [ring] address for saving CS_TIMESTAMP */
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ /* Rewrite the batch buffer for the next execution */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const uint32_t *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static bool has_ctx_timestamp(int i915, const struct intel_execution_engine2 *e)
+{
+ const int gen = intel_gen(intel_get_drm_devid(i915));
+
+ if (gen == 8 && e->class == I915_ENGINE_CLASS_VIDEO)
+ return false; /* looks fubar */
+
+ return true;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+ const struct intel_execution_engine2 *e;
+ unsigned int count = 0;
+
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ count++;
+ }
+ if (!count)
+ return *not;
+
+ count = rand() % count;
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ if (!count--)
+ break;
+ }
+
+ return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeline,
+ uint32_t common,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *median,
+ unsigned long *iqr)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+#define F_SPARE (1 << 5)
+#define F_NEXT (1 << 6)
+#define F_VIP (1 << 7)
+#define F_RRUL (1 << 8)
+#define F_SHARE (1 << 9)
+#define F_PING (1 << 10)
+#define F_THROTTLE (1 << 11)
+#define F_ISOLATE (1 << 12)
+{
+ const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+ struct drm_i915_gem_exec_object2 obj[4] = {
+ {},
+ {
+ .handle = common ?: gem_create(i915, 4096),
+ },
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ };
+ struct intel_execution_engine2 ping = *e;
+ int p_fence = -1, n_fence = -1;
+ unsigned long count = 0;
+ int n;
+
+ srandom(getpid());
+ if (flags & F_PING)
+ ping = pick_random_engine(i915, e);
+ obj[0] = tslog_create(i915, ctx, &ping);
+
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(obj),
+ .buffer_count = 3,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_FLOW) {
+ unsigned int seq;
+
+ seq = count;
+ if (flags & F_NEXT)
+ seq++;
+
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, seq);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+ close(execbuf.rsvd2);
+
+ execbuf.buffer_count = 1;
+ execbuf.batch_start_offset = 2048;
+ execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+ execbuf.rsvd2 = n_fence;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACE && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+
+ if (flags & F_SYNC) {
+ struct pollfd pfd = {
+ .fd = n_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+
+ if (flags & F_THROTTLE)
+ igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+ igt_swap(obj[2], obj[3]);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ close(p_fence);
+
+ gem_close(i915, obj[3].handle);
+ gem_close(i915, obj[2].handle);
+ if (obj[1].handle != common)
+ gem_close(i915, obj[1].handle);
+
+ gem_sync(i915, obj[0].handle);
+ if (median) {
+ uint32_t *map;
+
+ /*
+ * We recorded the CS_TIMESTAMP of each frame, and if
+ * the GPU is being shared completely fairly, we expect
+ * each frame to be at the same interval from the last.
+ *
+ * Compute the interval between frames and report back
+ * both the median interval and the range for this client.
+ */
+
+ map = gem_mmap__device_coherent(i915, obj[0].handle,
+ 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++) {
+ igt_assert(map[n]);
+ map[n - 1] = map[n] - map[n - 1];
+ }
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *iqr = ticks_to_ns(i915, map[(3 * n + 3) / 4] - map[n / 4]);
+ *median = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+ }
+ gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+ uint64_t cpu_time = 0;
+
+ cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+ cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+ return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+ struct timespec tv = { .tv_nsec = delay_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+ unsigned long *result, *iqr;
+ uint32_t common = 0;
+
+ igt_require(has_ctx_timestamp(i915, e));
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ if (flags & F_SHARE)
+ common = gem_create(i915, 4095);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ igt_assert(result != MAP_FAILED);
+ iqr = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ igt_assert(iqr != MAP_FAILED);
+
+ /*
+ * The combined workload always runs at a 60fps target (unless F_HALF!).
+ * This gives a frame of interval of 16ms that is evenly split across
+ * all the clients, so simulating a system with a bunch of clients that
+ * are perfectly balanced and can sustain 60fps. Our job is to ensure
+ * that each client does run at a smooth 60fps.
+ *
+ * Each client runs a fixed length delay loop (as a single request,
+ * or split into 3) and then records the CS_TIMESTAMP after completing
+ * its delay. Given a fair allotment of GPU time to each client,
+ * that timestamp will [ideally] be at a precise 16ms intervals.
+ * In practice, time is wasted on context switches, so as the number
+ * of clients increases, the proprotion of time spent on context
+ * switches grows. As we get to 64 render clients, we will be spending
+ * as much time in context switches as executing the client workloads.
+ *
+ * Each client frame may be paced by some throttling technique found
+ * in the wild. i.e. each client may wait until a simulated vblank
+ * to indicate the start of a new frame, or it may wait until the
+ * completion of a previous frame. This causes submission from each
+ * client and across the system to be chunky and uneven.
+ *
+ * We look at the variation of frame intervals within each client, and
+ * the variation of the medians across the clients to see if the
+ * distribution (budget) of GPU time was fair enough.
+ *
+ * Alternative (and important) metrics will be more latency centric;
+ * looking at how well we can sustain meeting deadline given competition
+ * by clients for the GPU.
+ */
+
+ for (int n = 2; n <= 256; n <<= 1) { /* 32 == 500us per client */
+ int timeline = sw_sync_timeline_create();
+ int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+ int nchild = n - 1; /* odd for easy medians */
+ const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct rusage old_usage, usage;
+ uint64_t cpu_time, d_time;
+ struct timespec tv;
+ struct igt_mean m;
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+
+ if (flags & F_PING) { /* fill the others with light bg load */
+ struct intel_execution_engine2 *ping;
+
+ __for_each_physical_engine(i915, ping) {
+ if (ping->flags == e->flags)
+ continue;
+
+ igt_fork(child, 1) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, ping,
+ child_ns / 8,
+ -1, common,
+ F_SOLO | F_PACE | F_SHARE,
+ &result[nchild],
+ NULL, NULL);
+
+ gem_context_destroy(i915, ctx);
+ }
+ }
+ }
+
+ getrusage(RUSAGE_CHILDREN, &old_usage);
+ igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+ igt_fork(child, nchild) {
+ uint32_t ctx;
+
+ if (flags & F_ISOLATE) {
+ int clone, dmabuf = -1;
+
+ if (common)
+ dmabuf = prime_handle_to_fd(i915, common);
+
+ clone = gem_reopen_driver(i915);
+ gem_context_copy_engines(i915, 0, clone, 0);
+ i915 = clone;
+
+ if (dmabuf != -1)
+ common = prime_fd_to_handle(i915, dmabuf);
+ }
+
+ ctx = gem_context_clone_with_engines(i915, 0);
+
+ if (flags & F_VIP && child == 0) {
+ gem_context_set_priority(i915, ctx, MAX_PRIO);
+ flags |= F_FLOW;
+ }
+ if (flags & F_RRUL && child == 0)
+ flags |= F_SOLO | F_FLOW | F_SYNC;
+
+ fair_child(i915, ctx, e, child_ns,
+ timeline, common, flags,
+ &result[nchild],
+ &result[child], &iqr[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nfences--)
+ timeline_advance(timeline, fence_ns);
+
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child]))
+ timeline_advance(timeline, fence_ns);
+ }
+
+ igt_waitchildren();
+ close(timeline);
+
+ /*
+ * Are we running out of CPU time, and fail to submit frames?
+ *
+ * We try to rule out any undue impact on the GPU scheduling
+ * from the CPU scheduler by looking for core saturation. If
+ * we may be in a situation where the clients + kernel are
+ * taking a whole core (think lockdep), then it is increasingly
+ * likely that our measurements include delays from the CPU
+ * scheduler. Err on the side of caution.
+ */
+ d_time = igt_nsec_elapsed(&tv);
+ getrusage(RUSAGE_CHILDREN, &usage);
+ cpu_time = d_cpu_time(&usage, &old_usage);
+ igt_debug("CPU usage: %.0f%%\n", 100. * cpu_time / d_time);
+ if (4 * cpu_time > 3 * d_time) {
+ if (nchild > 7) /* good enough to judge pass/fail */
+ break;
+
+ igt_skip_on_f(4 * cpu_time > 3 * d_time,
+ "%.0f%% CPU usage, presuming capacity exceeded\n",
+ 100. * cpu_time / d_time);
+ }
+
+ /* With no contention, we should match our target frametime */
+ if (nchild == 1) {
+ igt_assert(4 * result[0] > 3 * fence_ns &&
+ 3 * result[0] < 4 * fence_ns);
+ continue;
+ }
+
+ /*
+ * The VIP should always be able to hit the target frame rate;
+ * regardless of budget contention from lessor clients.
+ */
+ if (flags & (F_VIP | F_RRUL)) {
+ igt_info("VIP interval %.2fms, range %.2fms\n",
+ 1e-6 * result[0], 1e-6 * iqr[0]);
+ igt_assert_f(4 * result[0] > 3 * fence_ns &&
+ 3 * result[0] < 4 * fence_ns,
+ "VIP expects to run exactly when it wants, expects an interval of %.2fms, was %.2fms\n",
+ 1e-6 * fence_ns, 1e-6 * result[0]);
+ igt_assert_f(2 * iqr[0] < result[0],
+ "VIP frame IQR %.2fms exceeded median threshold %.2fms\n",
+ 1e-6 * iqr[0],
+ 1e-6 * result[0] / 2);
+ if (!--nchild)
+ continue;
+
+ /* Exclude the VIP result from the plebian statistics */
+ memmove(result, result + 1, nchild * sizeof(*result));
+ memmove(iqr, iqr + 1, nchild * sizeof(*iqr));
+ }
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ qsort(iqr, nchild, sizeof(*iqr), cmp_ul);
+
+ /*
+ * The target interval for median/mean is 16ms (fence_ns).
+ * However, this work is evenly split across the clients so
+ * the range (and median) of client medians may be much less
+ * than 16ms [16/3N]. We present median of medians to try
+ * and avoid any instability while running in CI; at the cost
+ * of insensitivity!
+ */
+ igt_info("%3d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f [%.1f, %.1f], mean: %.1f ± %.2f ms\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * iqr[lo], 1e-6 * iqr[hi],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+ igt_assert_f(iqr[nchild / 2] < 2 * result[nchild / 2],
+ "Child frame IQR %.2fms exceeded median threshold %.2fms\n",
+ 1e-6 * iqr[nchild / 2],
+ 1e-6 * result[nchild / 2] * 2);
+
+ igt_assert_f(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+ 3 * igt_mean_get(&m) < 4 * result[nchild / 2],
+ "Mean of client interval %.2fms differs from median %.2fms, distribution is skewed\n",
+
+ 1e-6 * igt_mean_get(&m), 1e-6 * result[nchild / 2]);
+
+ igt_assert_f(2 * (result[hi] - result[lo]) < result[nchild / 2],
+ "Interquartile range of client intervals %.2fms is as large as the median threshold %.2fms, clients are not evenly distributed!\n",
+ 1e-6 * (result[hi] - result[lo]),
+ 1e-6 * result[nchild / 2] / 2);
+
+ /* May be slowed due to sheer volume of context switches */
+ if (result[0] > 2 * fence_ns)
+ break;
+ }
+
+ munmap(iqr, 4096);
+ munmap(result, 4096);
+ if (common)
+ gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+ static const struct {
+ const char *name;
+ unsigned int flags;
+ } fair[] = {
+ /*
+ * none - maximal greed in each client
+ *
+ * Push as many frames from each client as fast as possible
+ */
+ { "none", 0 },
+ { "none-vip", F_VIP }, /* one vip client must meet deadlines */
+ { "none-solo", F_SOLO }, /* 1 batch per frame per client */
+ { "none-share", F_SHARE }, /* read from a common buffer */
+ { "none-rrul", F_RRUL }, /* "realtime-response under load" */
+ { "none-ping", F_PING }, /* measure inter-engine fairness */
+
+ /*
+ * throttle - original per client throttling
+ *
+ * Used for front buffering rendering where there is no
+ * extenal frame marker. Each client tries to only keep
+ * 20ms of work submitted, though that measurement is
+ * flawed...
+ *
+ * This is used by Xorg to try and maintain some resembalance
+ * of input/output consistency when being feed a continuous
+ * stream of X11 draw requests straight into scanout, where
+ * the clients may submit the work faster than can be drawn.
+ *
+ * Throttling tracks requests per-file (and assumes that
+ * all requests are in submission order across the whole file),
+ * so we split each child to its own fd.
+ */
+ { "throttle", F_THROTTLE | F_ISOLATE },
+ { "throttle-vip", F_THROTTLE | F_ISOLATE | F_VIP },
+ { "throttle-solo", F_THROTTLE | F_ISOLATE | F_SOLO },
+ { "throttle-share", F_THROTTLE | F_ISOLATE | F_SHARE },
+ { "throttle-rrul", F_THROTTLE | F_ISOLATE | F_RRUL },
+
+ /*
+ * pace - mesa "submit double buffering"
+ *
+ * Submit a frame, wait for previous frame to start. This
+ * prevents each client from getting too far ahead of its
+ * rendering, maintaining a consistent input/output latency.
+ */
+ { "pace", F_PACE },
+ { "pace-solo", F_PACE | F_SOLO },
+ { "pace-share", F_PACE | F_SOLO | F_SHARE },
+ { "pace-ping", F_PACE | F_SOLO | F_SHARE | F_PING},
+
+ /* sync - only submit a frame at a time */
+ { "sync", F_SYNC },
+ { "sync-vip", F_SYNC | F_VIP },
+ { "sync-solo", F_SYNC | F_SOLO },
+
+ /* flow - synchronise execution against the clock (vblank) */
+ { "flow", F_PACE | F_FLOW },
+ { "flow-solo", F_PACE | F_FLOW | F_SOLO },
+ { "flow-share", F_PACE | F_FLOW | F_SHARE },
+ { "flow-ping", F_PACE | F_FLOW | F_SHARE | F_PING },
+
+ /* next - submit ahead of the clock (vblank double buffering) */
+ { "next", F_PACE | F_FLOW | F_NEXT },
+ { "next-solo", F_PACE | F_FLOW | F_NEXT | F_SOLO },
+ { "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+ { "next-ping", F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+ /* spare - underutilise by a single client timeslice */
+ { "spare", F_PACE | F_FLOW | F_SPARE },
+ { "spare-solo", F_PACE | F_FLOW | F_SPARE | F_SOLO },
+
+ /* half - run at half pace (submit 16ms of work every 32ms) */
+ { "half", F_PACE | F_FLOW | F_HALF },
+ { "half-solo", F_PACE | F_FLOW | F_HALF | F_SOLO },
+
+ {}
+ };
+
+ igt_fixture {
+ igt_info("CS timestamp frequency: %d\n",
+ read_timestamp_frequency(i915));
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ }
+
+ for (typeof(*fair) *f = fair; f->name; f++) {
+ igt_subtest_with_dynamic_f("fair-%s", f->name) {
+ const struct intel_execution_engine2 *e;
+
+ __for_each_physical_engine(i915, e) {
+ if (!gem_class_can_store_dword(i915, e->class))
+ continue;
+
+ igt_dynamic_f("%s", e->name)
+ fairness(i915, e, timeout, f->flags);
+ }
+ }
+ }
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+ uint32_t ctx,
+ const struct intel_execution_engine2 *e)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+ struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = gem_create(i915, 4096),
+ .offset = 32 << 20,
+ .relocs_ptr = to_user_pointer(&reloc),
+ .relocation_count = 1,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .flags = e->flags,
+ .rsvd1 = ctx,
+ };
+#define RUNTIME (base + 0x3a8)
+ uint32_t *map, *cs;
+ uint32_t ts;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, obj.handle,
+ 0, 4096, PROT_WRITE);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = RUNTIME;
+ memset(&reloc, 0, sizeof(reloc));
+ reloc.target_handle = obj.handle;
+ reloc.presumed_offset = obj.offset;
+ reloc.offset = offset_in_page(cs);
+ reloc.delta = 4000;
+ *cs++ = obj.offset + 4000;
+ *cs++ = obj.offset >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+ gem_close(i915, obj.handle);
+
+ ts = map[1000];
+ munmap(map, 4096);
+
+ return ts;
+}
+
+static void fairslice(int i915,
+ const struct intel_execution_engine2 *e,
+ unsigned long flags)
+{
+ igt_spin_t *spin = NULL;
+ uint32_t ctx[3];
+ uint32_t ts[3];
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ ctx[i] = gem_context_clone_with_engines(i915, 0);
+ if (spin == NULL) {
+ spin = __igt_spin_new(i915,
+ .ctx = ctx[i],
+ .engine = e->flags,
+ .flags = flags);
+ } else {
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffer_count = 1,
+ .buffers_ptr = to_user_pointer(&spin->obj[IGT_SPIN_BATCH]),
+ .flags = e->flags,
+ .rsvd1 = ctx[i],
+ };
+ gem_execbuf(i915, &eb);
+ }
+ }
+
+ sleep(2); /* over the course of many timeslices */
+
+ igt_assert(gem_bo_busy(i915, spin->handle));
+ igt_spin_end(spin);
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ gem_context_destroy(i915, ctx[i]);
+ igt_spin_free(i915, spin);
+
+ qsort(ts, 3, sizeof(*ts), cmp_u32);
+ igt_info("%s: [%.1f, %.1f, %.1f] ms\n", e->name,
+ 1e-6 * ticks_to_ns(i915, ts[0]),
+ 1e-6 * ticks_to_ns(i915, ts[1]),
+ 1e-6 * ticks_to_ns(i915, ts[2]));
+
+ igt_assert_f(ts[2], "CTX_TIMESTAMP not reported!\n");
+ igt_assert_f((ts[2] - ts[0]) * 6 < ts[1],
+ "Range of timeslices greater than tolerable: %.2fms > %.2fms; unfair!\n",
+ 1e-6 * ticks_to_ns(i915, ts[2] - ts[0]),
+ 1e-6 * ticks_to_ns(i915, ts[1]) / 6);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2582,6 +3503,35 @@ igt_main
test_each_engine("lateslice", fd, e)
lateslice(fd, e->flags);
+ igt_subtest_group {
+ igt_fixture {
+ igt_require(gem_scheduler_has_semaphores(fd));
+ igt_require(gem_scheduler_has_preemption(fd));
+ igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+ }
+
+ test_each_engine("fairslice", fd, e)
+ fairslice(fd, e, 0);
+
+ test_each_engine("u-fairslice", fd, e)
+ fairslice(fd, e, IGT_SPIN_USERPTR);
+
+ igt_subtest("fairslice-all") {
+ __for_each_physical_engine(fd, e) {
+ igt_fork(child, 1)
+ fairslice(fd, e, 0);
+ }
+ igt_waitchildren();
+ }
+ igt_subtest("u-fairslice-all") {
+ __for_each_physical_engine(fd, e) {
+ igt_fork(child, 1)
+ fairslice(fd, e, IGT_SPIN_USERPTR);
+ }
+ igt_waitchildren();
+ }
+ }
+
test_each_engine("submit-early-slice", fd, e)
submit_slice(fd, e, EARLY_SUBMIT);
test_each_engine("submit-golden-slice", fd, e)
@@ -2610,6 +3560,10 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ igt_subtest_group {
+ test_fairness(fd, 2);
+ }
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.29.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-11-24 23:39 Chris Wilson
0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-11-24 23:39 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 847 +++++++++++++++++++++++++++++++++
1 file changed, 847 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index f23d63ac3..d888efcd7 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
#include <sys/poll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/syscall.h>
#include <sched.h>
#include <signal.h>
@@ -2516,6 +2517,819 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ctx_ticks(int i915, uint64_t ns)
+{
+ int f = read_timestamp_frequency(i915);
+ if (intel_gen(intel_get_drm_devid(i915)) == 11)
+ f = 12500000; /* icl!!! are you feeling alright? CTX vs CS */
+ return div64_u64_round_up(ns * f, NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ /* Loop until CTX_TIMESTAMP - initial > @ns */
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(START_TS);
+
+ while (offset_in_page(cs) & 63)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(NOW_TS);
+
+ /* delta = now - start; inverted to match COND_BBE */
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ /* Save delta for reading by COND_BBE */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Delay between SRM and COND_BBE to post the writes */
+ for (int n = 0; n < 8; n++) {
+ *cs++ = MI_STORE_DWORD_IMM;
+ if (use_64b) {
+ *cs++ = addr + 4064;
+ *cs++ = addr >> 32;
+ } else {
+ *cs++ = 0;
+ *cs++ = addr + 4064;
+ }
+ *cs++ = 0;
+ }
+
+ /* Break if delta [time elapsed] > ns */
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ctx_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Otherwise back to recalculating delta */
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { INC, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ /* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ /* Load the address + inc & mask variables */
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ /* Increment the [ring] address for saving CS_TIMESTAMP */
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ /* Rewrite the batch buffer for the next execution */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const uint32_t *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static bool has_ctx_timestamp(int i915, const struct intel_execution_engine2 *e)
+{
+ const int gen = intel_gen(intel_get_drm_devid(i915));
+
+ if (gen == 8 && e->class == I915_ENGINE_CLASS_VIDEO)
+ return false; /* looks fubar */
+
+ return true;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+ const struct intel_execution_engine2 *e;
+ unsigned int count = 0;
+
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ count++;
+ }
+ if (!count)
+ return *not;
+
+ count = rand() % count;
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ if (!count--)
+ break;
+ }
+
+ return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeline,
+ uint32_t common,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *out)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+#define F_SPARE (1 << 5)
+#define F_NEXT (1 << 6)
+#define F_VIP (1 << 7)
+#define F_RRUL (1 << 8)
+#define F_SHARE (1 << 9)
+#define F_PING (1 << 10)
+#define F_THROTTLE (1 << 11)
+#define F_ISOLATE (1 << 12)
+{
+ const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+ struct drm_i915_gem_exec_object2 obj[4] = {
+ {},
+ {
+ .handle = common ?: gem_create(i915, 4096),
+ },
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ };
+ struct intel_execution_engine2 ping = *e;
+ int p_fence = -1, n_fence = -1;
+ unsigned long count = 0;
+ int n;
+
+ srandom(getpid());
+ if (flags & F_PING)
+ ping = pick_random_engine(i915, e);
+ obj[0] = tslog_create(i915, ctx, &ping);
+
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(obj),
+ .buffer_count = 4,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_FLOW) {
+ unsigned int seq;
+
+ seq = count;
+ if (flags & F_NEXT)
+ seq++;
+
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, seq);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+ close(execbuf.rsvd2);
+
+ execbuf.buffer_count = 1;
+ execbuf.batch_start_offset = 2048;
+ execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+ execbuf.rsvd2 = n_fence;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACE && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+
+ if (flags & F_SYNC) {
+ struct pollfd pfd = {
+ .fd = n_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+
+ if (flags & F_THROTTLE)
+ igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+ igt_swap(obj[2], obj[3]);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ close(p_fence);
+
+ gem_close(i915, obj[3].handle);
+ gem_close(i915, obj[2].handle);
+ if (obj[1].handle != common)
+ gem_close(i915, obj[1].handle);
+
+ gem_sync(i915, obj[0].handle);
+ if (out) {
+ uint32_t *map;
+
+ map = gem_mmap__device_coherent(i915, obj[0].handle,
+ 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++) {
+ igt_assert(map[n]);
+ map[n - 1] = map[n] - map[n - 1];
+ }
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *out = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+ }
+ gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+ uint64_t cpu_time = 0;
+
+ cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+ cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+ return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+ struct timespec tv = { .tv_nsec = delay_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+ unsigned long *result;
+ uint32_t common = 0;
+
+ igt_require(has_ctx_timestamp(i915, e));
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ if (flags & F_SHARE)
+ common = gem_create(i915, 4095);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 64; n <<= 1) { /* 32 == 500us per client */
+ int timeline = sw_sync_timeline_create();
+ int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+ const int nchild = n - 1; /* odd for easy medians */
+ const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct rusage old_usage, usage;
+ uint64_t cpu_time, d_time;
+ unsigned long vip = -1;
+ struct timespec tv;
+ struct igt_mean m;
+
+ if (flags & F_PING) {
+ struct intel_execution_engine2 *ping;
+
+ __for_each_physical_engine(i915, ping) {
+ if (ping->flags == e->flags)
+ continue;
+
+ igt_fork(child, 1) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, ping,
+ child_ns / 8,
+ -1, common,
+ F_SOLO | F_PACE | F_SHARE,
+ &result[nchild],
+ NULL);
+
+ gem_context_destroy(i915, ctx);
+ }
+ }
+ }
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+ getrusage(RUSAGE_CHILDREN, &old_usage);
+ igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+ igt_fork(child, nchild) {
+ uint32_t ctx;
+
+ if (flags & F_ISOLATE) {
+ int clone, dmabuf = -1;
+
+ if (common)
+ dmabuf = prime_handle_to_fd(i915, common);
+
+ clone = gem_reopen_driver(i915);
+ gem_context_copy_engines(i915, 0, clone, 0);
+ i915 = clone;
+
+ if (dmabuf != -1)
+ common = prime_fd_to_handle(i915, dmabuf);
+ }
+
+ ctx = gem_context_clone_with_engines(i915, 0);
+
+ if (flags & F_VIP && child == 0) {
+ gem_context_set_priority(i915, ctx, MAX_PRIO);
+ flags |= F_FLOW;
+ }
+ if (flags & F_RRUL && child == 0)
+ flags |= F_SOLO | F_FLOW | F_SYNC;
+
+ fair_child(i915, ctx, e, child_ns,
+ timeline, common, flags,
+ &result[nchild],
+ &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nfences--)
+ timeline_advance(timeline, fence_ns);
+
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child]))
+ timeline_advance(timeline, fence_ns);
+ }
+
+ igt_waitchildren();
+ close(timeline);
+
+ /* Are we running out of CPU time, and fail to submit frames? */
+ d_time = igt_nsec_elapsed(&tv);
+ getrusage(RUSAGE_CHILDREN, &usage);
+ cpu_time = d_cpu_time(&usage, &old_usage);
+ if (10 * cpu_time > 9 * d_time) {
+ if (nchild > 7)
+ break;
+
+ igt_skip_on_f(10 * cpu_time > 9 * d_time,
+ "%.0f%% CPU usage, presuming capacity exceeded\n",
+ 100. * cpu_time / d_time);
+ }
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ if (flags & (F_VIP | F_RRUL))
+ vip = result[0];
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ igt_info("%2d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+ if (vip != -1) {
+ igt_info("VIP interval %.2f ms\n", 1e-6 * vip);
+ igt_assert(4 * vip > 3 * fence_ns &&
+ 3 * vip < 4 * fence_ns);
+ }
+
+ /* May be slowed due to sheer volume of context switches */
+ igt_assert(4 * igt_mean_get(&m) > 3 * fence_ns &&
+ igt_mean_get(&m) < 3 * fence_ns);
+
+ igt_assert(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+ 3 * igt_mean_get(&m) < 4 * result[nchild / 2]);
+
+ igt_assert(2 * (result[hi] - result[lo]) < result[nchild / 2]);
+ }
+
+ munmap(result, 4096);
+ if (common)
+ gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+ static const struct {
+ const char *name;
+ unsigned int flags;
+ } fair[] = {
+ /*
+ * none - maximal greed in each client
+ *
+ * Push as many frames from each client as fast as possible
+ */
+ { "none", 0 },
+ { "none-vip", F_VIP }, /* one vip client must meet deadlines */
+ { "none-solo", F_SOLO }, /* 1 batch per frame per client */
+ { "none-share", F_SHARE }, /* read from a common buffer */
+ { "none-rrul", F_RRUL }, /* "realtime-response under load" */
+ { "none-ping", F_PING }, /* measure inter-engine fairness */
+
+ /*
+ * throttle - original per client throttling
+ *
+ * Used for front buffering rendering where there is no
+ * extenal frame marker. Each client tries to only keep
+ * 20ms of work submitted, though that measurement is
+ * flawed...
+ *
+ * This is used by Xorg to try and maintain some resembalance
+ * of input/output consistency when being feed a continuous
+ * stream of X11 draw requests straight into scanout, where
+ * the clients may submit the work faster than can be drawn.
+ *
+ * Throttling tracks requests per-file (and assumes that
+ * all requests are in submission order across the whole file),
+ * so we split each child to its own fd.
+ */
+ { "throttle", F_THROTTLE | F_ISOLATE },
+ { "throttle-vip", F_THROTTLE | F_ISOLATE | F_VIP },
+ { "throttle-solo", F_THROTTLE | F_ISOLATE | F_SOLO },
+ { "throttle-share", F_THROTTLE | F_ISOLATE | F_SHARE },
+ { "throttle-rrul", F_THROTTLE | F_ISOLATE | F_RRUL },
+
+ /*
+ * pace - mesa "submit double buffering"
+ *
+ * Submit a frame, wait for previous frame to start. This
+ * prevents each client from getting too far ahead of its
+ * rendering, maintaining a consistent input/output latency.
+ */
+ { "pace", F_PACE },
+ { "pace-solo", F_PACE | F_SOLO},
+ { "pace-share", F_PACE | F_SHARE},
+ { "pace-ping", F_PACE | F_SHARE | F_PING},
+
+ /* sync - only submit a frame at a time */
+ { "sync", F_SYNC },
+ { "sync-vip", F_SYNC | F_VIP },
+ { "sync-solo", F_SYNC | F_SOLO },
+
+ /* flow - synchronise execution against the clock (vblank) */
+ { "flow", F_PACE | F_FLOW },
+ { "flow-share", F_PACE | F_FLOW | F_SHARE },
+ { "flow-ping", F_PACE | F_FLOW | F_SHARE | F_PING },
+
+ /* next - submit ahead of the clock (vblank double buffering) */
+ { "next", F_PACE | F_FLOW | F_NEXT },
+ { "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+ { "next-ping", F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+ /* spare - underutilise by a single client timeslice */
+ { "spare", F_PACE | F_FLOW | F_SPARE },
+
+ /* half - run at half pace (submit 16ms of work every 32ms) */
+ { "half", F_PACE | F_FLOW | F_HALF },
+
+ {}
+ };
+
+ igt_fixture {
+ igt_info("CS timestamp frequency: %d\n",
+ read_timestamp_frequency(i915));
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ }
+
+ for (typeof(*fair) *f = fair; f->name; f++) {
+ igt_subtest_with_dynamic_f("fair-%s", f->name) {
+ const struct intel_execution_engine2 *e;
+
+ __for_each_physical_engine(i915, e) {
+ if (!gem_class_can_store_dword(i915, e->class))
+ continue;
+
+ igt_dynamic_f("%s", e->name)
+ fairness(i915, e, timeout, f->flags);
+ }
+ }
+ }
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+ uint32_t ctx,
+ const struct intel_execution_engine2 *e)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+ struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = gem_create(i915, 4096),
+ .offset = 32 << 20,
+ .relocs_ptr = to_user_pointer(&reloc),
+ .relocation_count = 1,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .flags = e->flags,
+ .rsvd1 = ctx,
+ };
+#define RUNTIME (base + 0x3a8)
+ uint32_t *map, *cs;
+ uint32_t ts;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, obj.handle,
+ 0, 4096, PROT_WRITE);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = RUNTIME;
+ memset(&reloc, 0, sizeof(reloc));
+ reloc.target_handle = obj.handle;
+ reloc.presumed_offset = obj.offset;
+ reloc.offset = offset_in_page(cs);
+ reloc.delta = 4000;
+ *cs++ = obj.offset + 4000;
+ *cs++ = obj.offset >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+ gem_close(i915, obj.handle);
+
+ ts = map[1000];
+ munmap(map, 4096);
+
+ return ts;
+}
+
+static void fairslice(int i915,
+ const struct intel_execution_engine2 *e,
+ unsigned long flags)
+{
+ igt_spin_t *spin = NULL;
+ uint32_t ctx[3];
+ uint32_t ts[3];
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ ctx[i] = gem_context_clone_with_engines(i915, 0);
+ if (spin == NULL) {
+ spin = __igt_spin_new(i915,
+ .ctx = ctx[i],
+ .engine = e->flags,
+ .flags = flags);
+ } else {
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffer_count = 1,
+ .buffers_ptr = to_user_pointer(&spin->obj[IGT_SPIN_BATCH]),
+ .flags = e->flags,
+ .rsvd1 = ctx[i],
+ };
+ gem_execbuf(i915, &eb);
+ }
+ }
+
+ sleep(2); /* over the course of many timeslices */
+
+ igt_assert(gem_bo_busy(i915, spin->handle));
+ igt_spin_end(spin);
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ gem_context_destroy(i915, ctx[i]);
+ igt_spin_free(i915, spin);
+
+ qsort(ts, 3, sizeof(*ts), cmp_u32);
+ igt_info("%s: [%.1f, %.1f] ms\n", e->name,
+ 1e-6 * ticks_to_ns(i915, ts[0]),
+ 1e-6 * ticks_to_ns(i915, ts[2]));
+
+ igt_assert(ts[0] && ts[2] > ts[0]);
+ igt_assert(4 * ts[0] > 3 * ts[2]);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2582,6 +3396,35 @@ igt_main
test_each_engine("lateslice", fd, e)
lateslice(fd, e->flags);
+ igt_subtest_group {
+ igt_fixture {
+ igt_require(gem_scheduler_has_semaphores(fd));
+ igt_require(gem_scheduler_has_preemption(fd));
+ igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+ }
+
+ test_each_engine("fairslice", fd, e)
+ fairslice(fd, e, 0);
+
+ test_each_engine("u-fairslice", fd, e)
+ fairslice(fd, e, IGT_SPIN_USERPTR);
+
+ igt_subtest("fairslice-all") {
+ __for_each_physical_engine(fd, e) {
+ igt_fork(child, 1)
+ fairslice(fd, e, 0);
+ }
+ igt_waitchildren();
+ }
+ igt_subtest("u-fairslice-all") {
+ __for_each_physical_engine(fd, e) {
+ igt_fork(child, 1)
+ fairslice(fd, e, IGT_SPIN_USERPTR);
+ }
+ igt_waitchildren();
+ }
+ }
+
test_each_engine("submit-early-slice", fd, e)
submit_slice(fd, e, EARLY_SUBMIT);
test_each_engine("submit-golden-slice", fd, e)
@@ -2610,6 +3453,10 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ igt_subtest_group {
+ test_fairness(fd, 2);
+ }
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.29.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-08-03 13:57 Chris Wilson
0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-08-03 13:57 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 816 +++++++++++++++++++++++++++++++++
1 file changed, 816 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 488d93511..7c8ea6d70 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
#include <sys/poll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/syscall.h>
#include <sched.h>
#include <signal.h>
@@ -2503,6 +2504,800 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ /* Loop until CTX_TIMESTAMP - initial > @ns */
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(START_TS);
+
+ while (offset_in_page(cs) & 63)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(NOW_TS);
+
+ /* delta = now - start; inverted to match COND_BBE */
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ /* Save delta for reading by COND_BBE */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Delay between SRM and COND_BBE to post the writes */
+ for (int n = 0; n < 8; n++) {
+ *cs++ = MI_STORE_DWORD_IMM;
+ if (use_64b) {
+ *cs++ = addr + 4064;
+ *cs++ = addr >> 32;
+ } else {
+ *cs++ = 0;
+ *cs++ = addr + 4064;
+ }
+ *cs++ = 0;
+ }
+
+ /* Break if delta > ns */
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Otherwise back to recalculating delta */
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { INC, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ /* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ /* Load the address + inc & mask variables */
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ /* Increment the [ring] address for saving CS_TIMESTAMP */
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ /* Rewrite the batch buffer for the next execution */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const uint32_t *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static bool has_ctx_timestamp(int i915, const struct intel_execution_engine2 *e)
+{
+ const int gen = intel_gen(intel_get_drm_devid(i915));
+
+ if (gen == 8 && e->class == I915_ENGINE_CLASS_VIDEO)
+ return false; /* looks fubar */
+
+ return true;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+ const struct intel_execution_engine2 *e;
+ unsigned int count = 0;
+
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ count++;
+ }
+ if (!count)
+ return *not;
+
+ count = rand() % count;
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ if (!count--)
+ break;
+ }
+
+ return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeline,
+ uint32_t common,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *out)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+#define F_SPARE (1 << 5)
+#define F_NEXT (1 << 6)
+#define F_VIP (1 << 7)
+#define F_RRUL (1 << 8)
+#define F_SHARE (1 << 9)
+#define F_PING (1 << 10)
+#define F_THROTTLE (1 << 11)
+#define F_ISOLATE (1 << 12)
+{
+ const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+ struct drm_i915_gem_exec_object2 obj[4] = {
+ {},
+ {
+ .handle = common ?: gem_create(i915, 4096),
+ },
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ };
+ struct intel_execution_engine2 ping = *e;
+ int p_fence = -1, n_fence = -1;
+ unsigned long count = 0;
+ int n;
+
+ srandom(getpid());
+ if (flags & F_PING)
+ ping = pick_random_engine(i915, e);
+ obj[0] = tslog_create(i915, ctx, &ping);
+
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(obj),
+ .buffer_count = 4,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_FLOW) {
+ unsigned int seq;
+
+ seq = count;
+ if (flags & F_NEXT)
+ seq++;
+
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, seq);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+ close(execbuf.rsvd2);
+
+ execbuf.buffer_count = 1;
+ execbuf.batch_start_offset = 2048;
+ execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+ execbuf.rsvd2 = n_fence;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACE && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+
+ if (flags & F_SYNC) {
+ struct pollfd pfd = {
+ .fd = n_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+
+ if (flags & F_THROTTLE)
+ igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+ igt_swap(obj[2], obj[3]);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ close(p_fence);
+
+ gem_close(i915, obj[3].handle);
+ gem_close(i915, obj[2].handle);
+ if (obj[1].handle != common)
+ gem_close(i915, obj[1].handle);
+
+ gem_sync(i915, obj[0].handle);
+ if (out) {
+ uint32_t *map;
+
+ map = gem_mmap__device_coherent(i915, obj[0].handle,
+ 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++) {
+ igt_assert(map[n]);
+ map[n - 1] = map[n] - map[n - 1];
+ }
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *out = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+ }
+ gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+ uint64_t cpu_time = 0;
+
+ cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+ cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+ return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+ struct timespec tv = { .tv_nsec = delay_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+ unsigned long *result;
+ uint32_t common = 0;
+
+ igt_require(has_ctx_timestamp(i915, e));
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ if (flags & F_SHARE)
+ common = gem_create(i915, 4095);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 64; n <<= 1) { /* 32 == 500us per client */
+ int timeline = sw_sync_timeline_create();
+ int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+ const int nchild = n - 1; /* odd for easy medians */
+ const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct rusage old_usage, usage;
+ uint64_t cpu_time, d_time;
+ unsigned long vip = -1;
+ struct timespec tv;
+ struct igt_mean m;
+
+ if (flags & F_PING) {
+ struct intel_execution_engine2 *ping;
+
+ __for_each_physical_engine(i915, ping) {
+ if (ping->flags == e->flags)
+ continue;
+
+ igt_fork(child, 1) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, ping,
+ child_ns / 8,
+ -1, common,
+ F_SOLO | F_PACE | F_SHARE,
+ &result[nchild],
+ NULL);
+
+ gem_context_destroy(i915, ctx);
+ }
+ }
+ }
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+ getrusage(RUSAGE_CHILDREN, &old_usage);
+ igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+ igt_fork(child, nchild) {
+ uint32_t ctx;
+
+ if (flags & F_ISOLATE) {
+ int clone, dmabuf = -1;
+
+ if (common)
+ dmabuf = prime_handle_to_fd(i915, common);
+
+ clone = gem_reopen_driver(i915);
+ gem_context_copy_engines(i915, 0, clone, 0);
+ i915 = clone;
+
+ if (dmabuf != -1)
+ common = prime_fd_to_handle(i915, dmabuf);
+ }
+
+ ctx = gem_context_clone_with_engines(i915, 0);
+
+ if (flags & F_VIP && child == 0) {
+ gem_context_set_priority(i915, ctx, MAX_PRIO);
+ flags |= F_FLOW;
+ }
+ if (flags & F_RRUL && child == 0)
+ flags |= F_SOLO | F_FLOW | F_SYNC;
+
+ fair_child(i915, ctx, e, child_ns,
+ timeline, common, flags,
+ &result[nchild],
+ &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nfences--)
+ timeline_advance(timeline, fence_ns);
+
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child]))
+ timeline_advance(timeline, fence_ns);
+ }
+
+ igt_waitchildren();
+ close(timeline);
+
+ /* Are we running out of CPU time, and fail to submit frames? */
+ d_time = igt_nsec_elapsed(&tv);
+ getrusage(RUSAGE_CHILDREN, &usage);
+ cpu_time = d_cpu_time(&usage, &old_usage);
+ if (10 * cpu_time > 9 * d_time) {
+ if (nchild > 7)
+ break;
+
+ igt_skip_on_f(10 * cpu_time > 9 * d_time,
+ "%.0f%% CPU usage, presuming capacity exceeded\n",
+ 100. * cpu_time / d_time);
+ }
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ if (flags & (F_VIP | F_RRUL))
+ vip = result[0];
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ igt_info("%2d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+ if (vip != -1) {
+ igt_info("VIP interval %.2f ms\n", 1e-6 * vip);
+ igt_assert(4 * vip > 3 * fence_ns &&
+ 3 * vip < 4 * fence_ns);
+ }
+
+ /* May be slowed due to sheer volume of context switches */
+ igt_assert(4 * igt_mean_get(&m) > 3 * fence_ns &&
+ igt_mean_get(&m) < 3 * fence_ns);
+
+ igt_assert(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+ 3 * igt_mean_get(&m) < 4 * result[nchild / 2]);
+
+ igt_assert(2 * (result[hi] - result[lo]) < result[nchild / 2]);
+ }
+
+ munmap(result, 4096);
+ if (common)
+ gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+ static const struct {
+ const char *name;
+ unsigned int flags;
+ } fair[] = {
+ /*
+ * none - maximal greed in each client
+ *
+ * Push as many frames from each client as fast as possible
+ */
+ { "none", 0 },
+ { "none-vip", F_VIP }, /* one vip client must meet deadlines */
+ { "none-solo", F_SOLO }, /* 1 batch per frame per client */
+ { "none-share", F_SHARE }, /* read from a common buffer */
+ { "none-rrul", F_RRUL }, /* "realtime-response under load" */
+ { "none-ping", F_PING }, /* measure inter-engine fairness */
+
+ /*
+ * throttle - original per client throttling
+ *
+ * Used for front buffering rendering where there is no
+ * extenal frame marker. Each client tries to only keep
+ * 20ms of work submitted, though that measurement is
+ * flawed...
+ *
+ * This is used by Xory to try and maintain some resembalance
+ * of input/output consistency when being feed a continuous
+ * stream of X11 draw requests straight into scanout, where
+ * the clients may submit the work faster than can be drawn.
+ *
+ * Throttling tracks requests per-file (and assumes that
+ * all requests are in submission order across the whole file),
+ * so we split each child to its own fd.
+ */
+ { "throttle", F_THROTTLE | F_ISOLATE },
+ { "throttle-vip", F_THROTTLE | F_ISOLATE | F_VIP },
+ { "throttle-solo", F_THROTTLE | F_ISOLATE | F_SOLO },
+ { "throttle-share", F_THROTTLE | F_ISOLATE | F_SHARE },
+ { "throttle-rrul", F_THROTTLE | F_ISOLATE | F_RRUL },
+
+ /*
+ * pace - mesa "submit double buffering"
+ *
+ * Submit a frame, wait for previous frame to start. This
+ * prevents each client from getting too far ahead of its
+ * rendering, maintaining a consistent input/output latency.
+ */
+ { "pace", F_PACE },
+ { "pace-solo", F_PACE | F_SOLO},
+ { "pace-share", F_PACE | F_SHARE},
+ { "pace-ping", F_PACE | F_SHARE | F_PING},
+
+ /* sync - only submit a frame at a time */
+ { "sync", F_SYNC },
+ { "sync-vip", F_SYNC | F_VIP },
+ { "sync-solo", F_SYNC | F_SOLO },
+
+ /* flow - synchronise execution against the clock (vblank) */
+ { "flow", F_PACE | F_FLOW },
+ { "flow-share", F_PACE | F_FLOW | F_SHARE },
+ { "flow-ping", F_PACE | F_FLOW | F_SHARE | F_PING },
+
+ /* next - submit ahead of the clock (vblank double buffering) */
+ { "next", F_PACE | F_FLOW | F_NEXT },
+ { "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+ { "next-ping", F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+ /* spare - underutilise by a single client timeslice */
+ { "spare", F_PACE | F_FLOW | F_SPARE },
+
+ /* half - run at half pace (submit 16ms of work every 32ms) */
+ { "half", F_PACE | F_FLOW | F_HALF },
+
+ {}
+ };
+
+ for (typeof(*fair) *f = fair; f->name; f++) {
+ igt_subtest_with_dynamic_f("fair-%s", f->name) {
+ const struct intel_execution_engine2 *e;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+ __for_each_physical_engine(i915, e) {
+ if (!gem_class_can_store_dword(i915, e->class))
+ continue;
+
+ igt_dynamic_f("%s", e->name)
+ fairness(i915, e, timeout, f->flags);
+ }
+ }
+ }
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+ uint32_t ctx,
+ const struct intel_execution_engine2 *e)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+ struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = gem_create(i915, 4096),
+ .offset = 32 << 20,
+ .relocs_ptr = to_user_pointer(&reloc),
+ .relocation_count = 1,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .flags = e->flags,
+ .rsvd1 = ctx,
+ };
+#define RUNTIME (base + 0x3a8)
+ uint32_t *map, *cs;
+ uint32_t ts;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, obj.handle,
+ 0, 4096, PROT_WRITE);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = RUNTIME;
+ memset(&reloc, 0, sizeof(reloc));
+ reloc.target_handle = obj.handle;
+ reloc.presumed_offset = obj.offset;
+ reloc.offset = offset_in_page(cs);
+ reloc.delta = 4000;
+ *cs++ = obj.offset + 4000;
+ *cs++ = obj.offset >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+ gem_close(i915, obj.handle);
+
+ ts = map[1000];
+ munmap(map, 4096);
+
+ return ts;
+}
+
+static void fairslice(int i915, const struct intel_execution_engine2 *e)
+{
+ igt_spin_t *spin[3];
+ uint32_t ctx[3];
+ uint32_t ts[3];
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ ctx[i] = gem_context_clone_with_engines(i915, 0);
+ spin[i] = igt_spin_new(i915, .ctx = ctx[i], .engine = e->flags);
+ }
+
+ sleep(2); /* over the course of many timeslices */
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ igt_assert(gem_bo_busy(i915, spin[i]->handle));
+ igt_spin_end(spin[i]);
+
+ ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+ }
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ igt_spin_free(i915, spin[i]);
+ gem_context_destroy(i915, ctx[i]);
+ }
+
+ qsort(ts, 3, sizeof(*ts), cmp_u32);
+ igt_info("%s: [%.1f, %.1f] ms\n", e->name,
+ 1e-6 * ticks_to_ns(i915, ts[0]),
+ 1e-6 * ticks_to_ns(i915, ts[2]));
+
+ igt_assert(ts[0] && ts[2] > ts[0]);
+ igt_assert(4 * ts[0] > 3 * ts[2]);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2569,6 +3364,25 @@ igt_main
test_each_engine("lateslice", fd, e)
lateslice(fd, e->flags);
+ igt_subtest_group {
+ igt_fixture {
+ igt_require(gem_scheduler_has_semaphores(fd));
+ igt_require(gem_scheduler_has_preemption(fd));
+ igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+ }
+
+ test_each_engine("fairslice", fd, e)
+ fairslice(fd, e);
+
+ igt_subtest("fairslice-all") {
+ __for_each_physical_engine(fd, e) {
+ igt_fork(child, 1)
+ fairslice(fd, e);
+ }
+ igt_waitchildren();
+ }
+ }
+
test_each_engine("submit-early-slice", fd, e)
submit_slice(fd, e, EARLY_SUBMIT);
test_each_engine("submit-golden-slice", fd, e)
@@ -2597,6 +3411,8 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_fairness(fd, 2);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.28.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-22 19:08 Chris Wilson
0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-06-22 19:08 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 782 +++++++++++++++++++++++++++++++++
1 file changed, 782 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 931b1245f..fae04536c 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
#include <sys/poll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/syscall.h>
#include <sched.h>
#include <signal.h>
@@ -2501,6 +2502,766 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ /* Loop until CTX_TIMESTAMP - initial > @ns */
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(START_TS);
+
+ while (offset_in_page(cs) & 63)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(NOW_TS);
+
+ /* delta = now - start; inverted to match COND_BBE */
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ /* Save delta for reading by COND_BBE */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Delay between SRM and COND_BBE to post the writes */
+ for (int n = 0; n < 8; n++) {
+ *cs++ = MI_STORE_DWORD_IMM;
+ if (use_64b) {
+ *cs++ = addr + 4064;
+ *cs++ = addr >> 32;
+ } else {
+ *cs++ = 0;
+ *cs++ = addr + 4064;
+ }
+ *cs++ = 0;
+ }
+
+ /* Break if delta > ns */
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Otherwise back to recalculating delta */
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { INC, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ /* Record the current CS_TIMESTAMP into a journal [a 512 slot ring]. */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ /* Load the address + inc & mask variables */
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(INC) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ /* Increment the [ring] address for saving CS_TIMESTAMP */
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(INC));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ /* Rewrite the batch buffer for the next execution */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const uint32_t *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+ const struct intel_execution_engine2 *e;
+ unsigned int count = 0;
+
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ count++;
+ }
+ if (!count)
+ return *not;
+
+ count = rand() % count;
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ if (!count--)
+ break;
+ }
+
+ return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeline,
+ uint32_t common,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *out)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+#define F_SPARE (1 << 5)
+#define F_NEXT (1 << 6)
+#define F_VIP (1 << 7)
+#define F_RRUL (1 << 8)
+#define F_SHARE (1 << 9)
+#define F_PING (1 << 10)
+#define F_THROTTLE (1 << 11)
+{
+ const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+ struct drm_i915_gem_exec_object2 obj[4] = {
+ {},
+ {
+ .handle = common ?: gem_create(i915, 4096),
+ },
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ };
+ struct intel_execution_engine2 ping = *e;
+ int p_fence = -1, n_fence = -1;
+ unsigned long count = 0;
+ int n;
+
+ srandom(getpid());
+ if (flags & F_PING)
+ ping = pick_random_engine(i915, e);
+ obj[0] = tslog_create(i915, ctx, &ping);
+
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(obj),
+ .buffer_count = 4,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_FLOW) {
+ unsigned int seq;
+
+ seq = count;
+ if (flags & F_NEXT)
+ seq++;
+
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, seq);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+ close(execbuf.rsvd2);
+
+ execbuf.buffer_count = 1;
+ execbuf.batch_start_offset = 2048;
+ execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+ execbuf.rsvd2 = n_fence;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACE && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+
+ if (flags & F_SYNC) {
+ struct pollfd pfd = {
+ .fd = n_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+
+ if (flags & F_THROTTLE)
+ igt_ioctl(i915, DRM_IOCTL_I915_GEM_THROTTLE, 0);
+
+ igt_swap(obj[2], obj[3]);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ close(p_fence);
+
+ gem_close(i915, obj[3].handle);
+ gem_close(i915, obj[2].handle);
+ if (obj[1].handle != common)
+ gem_close(i915, obj[1].handle);
+
+ gem_sync(i915, obj[0].handle);
+ if (out) {
+ uint32_t *map;
+
+ map = gem_mmap__device_coherent(i915, obj[0].handle,
+ 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++) {
+ igt_assert(map[n]);
+ map[n - 1] = map[n] - map[n - 1];
+ }
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *out = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+ }
+ gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+ uint64_t cpu_time = 0;
+
+ cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+ cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+ return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+ struct timespec tv = { .tv_nsec = delay_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+ unsigned long *result;
+ uint32_t common = 0;
+
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ if (flags & F_SHARE)
+ common = gem_create(i915, 4095);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 64; n <<= 1) { /* 32 == 500us per client */
+ int timeline = sw_sync_timeline_create();
+ int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+ const int nchild = n - 1; /* odd for easy medians */
+ const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct rusage old_usage, usage;
+ uint64_t cpu_time, d_time;
+ unsigned long vip = -1;
+ struct timespec tv;
+ struct igt_mean m;
+
+ if (flags & F_PING) {
+ struct intel_execution_engine2 *ping;
+
+ __for_each_physical_engine(i915, ping) {
+ if (ping->flags == e->flags)
+ continue;
+
+ igt_fork(child, 1) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, ping,
+ child_ns / 8,
+ -1, common,
+ F_SOLO | F_PACE | F_SHARE,
+ &result[nchild],
+ NULL);
+
+ gem_context_destroy(i915, ctx);
+ }
+ }
+ }
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+ getrusage(RUSAGE_CHILDREN, &old_usage);
+ igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+ igt_fork(child, nchild) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ if (flags & F_VIP && child == 0) {
+ gem_context_set_priority(i915, ctx, MAX_PRIO);
+ flags |= F_FLOW;
+ }
+ if (flags & F_RRUL && child == 0)
+ flags |= F_SOLO | F_FLOW | F_SYNC;
+
+ fair_child(i915, ctx, e, child_ns,
+ timeline, common, flags,
+ &result[nchild],
+ &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nfences--)
+ timeline_advance(timeline, fence_ns);
+
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child]))
+ timeline_advance(timeline, fence_ns);
+ }
+
+ igt_waitchildren();
+ close(timeline);
+
+ /* Are we running out of CPU time, and fail to submit frames? */
+ d_time = igt_nsec_elapsed(&tv);
+ getrusage(RUSAGE_CHILDREN, &usage);
+ cpu_time = d_cpu_time(&usage, &old_usage);
+ if (10 * cpu_time > 9 * d_time) {
+ if (nchild > 7)
+ break;
+
+ igt_skip_on_f(10 * cpu_time > 9 * d_time,
+ "%.0f%% CPU usage, presuming capacity exceeded\n",
+ 100. * cpu_time / d_time);
+ }
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ if (flags & (F_VIP | F_RRUL))
+ vip = result[0];
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ igt_info("%2d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+ if (vip != -1) {
+ igt_info("VIP interval %.2f ms\n", 1e-6 * vip);
+ igt_assert(4 * vip > 3 * fence_ns &&
+ 3 * vip < 4 * fence_ns);
+ }
+
+ /* May be slowed due to sheer volume of context switches */
+ igt_assert(4 * igt_mean_get(&m) > 3 * fence_ns &&
+ igt_mean_get(&m) < 3 * fence_ns);
+
+ igt_assert(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+ 3 * igt_mean_get(&m) < 4 * result[nchild / 2]);
+
+ igt_assert(2 * (result[hi] - result[lo]) < result[nchild / 2]);
+ }
+
+ munmap(result, 4096);
+ if (common)
+ gem_close(i915, common);
+}
+
+static void test_fairness(int i915, int timeout)
+{
+ static const struct {
+ const char *name;
+ unsigned int flags;
+ } fair[] = {
+ /*
+ * none - maximal greed in each client
+ *
+ * Push as many frames from each client as fast as possible
+ */
+ { "none", 0 },
+ { "none-vip", F_VIP }, /* one vip client must meet deadlines */
+ { "none-solo", F_SOLO }, /* 1 batch per frame per client */
+ { "none-share", F_SHARE }, /* read from a common buffer */
+ { "none-rrul", F_RRUL }, /* "realtime-response under load" */
+ { "none-ping", F_PING }, /* measure inter-engine fairness */
+
+ /*
+ * throttle - original per client throttling
+ *
+ * Used for front buffering rendering where there is no
+ * extenal frame marker. Each client tries to only keep
+ * 20ms of work submitted, though that measurement is
+ * flawed...
+ *
+ * This is used by Xory to try and maintain some resembalance
+ * of input/output consistency when being feed a continuous
+ * stream of X11 draw requests straight into scanout, where
+ * the clients may submit the work faster than can be drawn.
+ */
+ { "throttle", F_THROTTLE },
+ { "throttle-vip", F_THROTTLE | F_VIP },
+ { "throttle-solo", F_THROTTLE | F_SOLO },
+ { "throttle-share", F_THROTTLE | F_SHARE },
+ { "throttle-rrul", F_THROTTLE | F_RRUL },
+
+ /*
+ * pace - mesa "submit double buffering"
+ *
+ * Submit a frame, wait for previous frame to start. This
+ * prevents each client from getting too far ahead of its
+ * rendering, maintaining a consistent input/output latency.
+ */
+ { "pace", F_PACE },
+ { "pace-solo", F_PACE | F_SOLO},
+ { "pace-share", F_PACE | F_SHARE},
+ { "pace-ping", F_PACE | F_SHARE | F_PING},
+
+ /* sync - only submit a frame at a time */
+ { "sync", F_SYNC },
+ { "sync-vip", F_SYNC | F_VIP },
+ { "sync-solo", F_SYNC | F_SOLO },
+
+ /* flow - synchronise execution against the clock (vblank) */
+ { "flow", F_PACE | F_FLOW },
+ { "flow-share", F_PACE | F_FLOW | F_SHARE },
+ { "flow-ping", F_PACE | F_FLOW | F_SHARE | F_PING },
+
+ /* next - submit ahead of the clock (vblank double buffering) */
+ { "next", F_PACE | F_FLOW | F_NEXT },
+ { "next-share", F_PACE | F_FLOW | F_NEXT | F_SHARE },
+ { "next-ping", F_PACE | F_FLOW | F_NEXT | F_SHARE | F_PING },
+
+ /* spare - underutilise by a single client timeslice */
+ { "spare", F_PACE | F_FLOW | F_SPARE },
+
+ /* half - run at half pace (submit 16ms of work every 32ms) */
+ { "half", F_PACE | F_FLOW | F_HALF },
+
+ {}
+ };
+
+ for (typeof(*fair) *f = fair; f->name; f++) {
+ igt_subtest_with_dynamic_f("fair-%s", f->name) {
+ const struct intel_execution_engine2 *e;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+ __for_each_physical_engine(i915, e) {
+ if (!gem_class_can_store_dword(i915, e->class))
+ continue;
+
+ igt_dynamic_f("%s", e->name)
+ fairness(i915, e, timeout, f->flags);
+ }
+ }
+ }
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+ uint32_t ctx,
+ const struct intel_execution_engine2 *e)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+ struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = gem_create(i915, 4096),
+ .offset = 32 << 20,
+ .relocs_ptr = to_user_pointer(&reloc),
+ .relocation_count = 1,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .flags = e->flags,
+ .rsvd1 = ctx,
+ };
+#define RUNTIME (base + 0x3a8)
+ uint32_t *map, *cs;
+ uint32_t ts;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, obj.handle,
+ 0, 4096, PROT_WRITE);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = RUNTIME;
+ memset(&reloc, 0, sizeof(reloc));
+ reloc.target_handle = obj.handle;
+ reloc.presumed_offset = obj.offset;
+ reloc.offset = offset_in_page(cs);
+ reloc.delta = 4000;
+ *cs++ = obj.offset + 4000;
+ *cs++ = obj.offset >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+ gem_close(i915, obj.handle);
+
+ ts = map[1000];
+ munmap(map, 4096);
+
+ return ts;
+}
+
+static void fairslice(int i915, const struct intel_execution_engine2 *e)
+{
+ igt_spin_t *spin[3];
+ uint32_t ctx[3];
+ uint32_t ts[3];
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ ctx[i] = gem_context_clone_with_engines(i915, 0);
+ spin[i] = igt_spin_new(i915, .ctx = ctx[i], .engine = e->flags);
+ }
+
+ sleep(2); /* over the course of many timeslices */
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ igt_assert(gem_bo_busy(i915, spin[i]->handle));
+ igt_spin_end(spin[i]);
+
+ ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+ }
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ igt_spin_free(i915, spin[i]);
+ gem_context_destroy(i915, ctx[i]);
+ }
+
+ qsort(ts, 3, sizeof(*ts), cmp_u32);
+ igt_info("%s: [%.1f, %.1f] ms\n", e->name,
+ 1e-6 * ticks_to_ns(i915, ts[0]),
+ 1e-6 * ticks_to_ns(i915, ts[2]));
+
+ igt_assert(ts[0] && ts[2] > ts[0]);
+ igt_assert(4 * ts[0] > 3 * ts[2]);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2567,6 +3328,25 @@ igt_main
test_each_engine("lateslice", fd, e)
lateslice(fd, e->flags);
+ igt_subtest_group {
+ igt_fixture {
+ igt_require(gem_scheduler_has_semaphores(fd));
+ igt_require(gem_scheduler_has_preemption(fd));
+ igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+ }
+
+ test_each_engine("fairslice", fd, e)
+ fairslice(fd, e);
+
+ igt_subtest("fairslice-all") {
+ __for_each_physical_engine(fd, e) {
+ igt_fork(child, 1)
+ fairslice(fd, e);
+ }
+ igt_waitchildren();
+ }
+ }
+
test_each_engine("submit-early-slice", fd, e)
submit_slice(fd, e, EARLY_SUBMIT);
test_each_engine("submit-golden-slice", fd, e)
@@ -2595,6 +3375,8 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_fairness(fd, 2);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.27.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-09 12:45 Chris Wilson
0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-06-09 12:45 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 699 +++++++++++++++++++++++++++++++++
1 file changed, 699 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..b3a1fedaa 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,6 +29,7 @@
#include <sys/poll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/syscall.h>
#include <sched.h>
#include <signal.h>
@@ -2495,6 +2496,666 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(START_TS);
+
+ while (offset_in_page(cs) & 63)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(NOW_TS);
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Delay between SRM and COND_BBE to post the writes */
+ for (int n = 0; n < 8; n++) {
+ *cs++ = MI_STORE_DWORD_IMM;
+ if (use_64b) {
+ *cs++ = addr + 4064;
+ *cs++ = addr >> 32;
+ } else {
+ *cs++ = 0;
+ *cs++ = addr + 4064;
+ }
+ *cs++ = 0;
+ }
+
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { ONE, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const uint32_t *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static struct intel_execution_engine2
+pick_random_engine(int i915, const struct intel_execution_engine2 *not)
+{
+ const struct intel_execution_engine2 *e;
+ unsigned int count = 0;
+
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ count++;
+ }
+ if (!count)
+ return *not;
+
+ count = rand() % count;
+ __for_each_physical_engine(i915, e) {
+ if (e->flags == not->flags)
+ continue;
+ if (!gem_class_has_mutable_submission(i915, e->class))
+ continue;
+ if (!count--)
+ break;
+ }
+
+ return *e;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeline,
+ uint32_t common,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *out)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+#define F_SPARE (1 << 5)
+#define F_NEXT (1 << 6)
+#define F_VIP (1 << 7)
+#define F_RRUL (1 << 8)
+#define F_SHARE (1 << 9)
+#define F_PING (1 << 10)
+{
+ const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+ struct drm_i915_gem_exec_object2 obj[4] = {
+ {},
+ {
+ .handle = common ?: gem_create(i915, 4096),
+ },
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame),
+ };
+ struct intel_execution_engine2 ping = *e;
+ int p_fence = -1, n_fence = -1;
+ unsigned long count = 0;
+ int n;
+
+ srandom(getpid());
+ if (flags & F_PING)
+ ping = pick_random_engine(i915, e);
+ obj[0] = tslog_create(i915, ctx, &ping);
+
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(obj),
+ .buffer_count = 4,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_FLOW) {
+ unsigned int seq;
+
+ seq = count;
+ if (flags & F_NEXT)
+ seq++;
+
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, seq);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+ close(execbuf.rsvd2);
+
+ execbuf.buffer_count = 1;
+ execbuf.batch_start_offset = 2048;
+ execbuf.flags = ping.flags | I915_EXEC_FENCE_IN;
+ execbuf.rsvd2 = n_fence;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACE && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+
+ if (flags & F_SYNC) {
+ struct pollfd pfd = {
+ .fd = n_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+
+ igt_swap(obj[2], obj[3]);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ close(p_fence);
+
+ gem_close(i915, obj[3].handle);
+ gem_close(i915, obj[2].handle);
+ if (obj[1].handle != common)
+ gem_close(i915, obj[1].handle);
+
+ gem_sync(i915, obj[0].handle);
+ if (out) {
+ uint32_t *map;
+
+ map = gem_mmap__device_coherent(i915, obj[0].handle,
+ 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++) {
+ igt_assert(map[n]);
+ map[n - 1] = map[n] - map[n - 1];
+ }
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *out = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+ }
+ gem_close(i915, obj[0].handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static uint64_t d_cpu_time(const struct rusage *a, const struct rusage *b)
+{
+ uint64_t cpu_time = 0;
+
+ cpu_time += (a->ru_utime.tv_sec - b->ru_utime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_utime.tv_usec - b->ru_utime.tv_usec) * 1000;
+
+ cpu_time += (a->ru_stime.tv_sec - b->ru_stime.tv_sec) * NSEC_PER_SEC;
+ cpu_time += (a->ru_stime.tv_usec - b->ru_stime.tv_usec) * 1000;
+
+ return cpu_time;
+}
+
+static void timeline_advance(int timeline, int delay_ns)
+{
+ struct timespec tv = { .tv_nsec = delay_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+ unsigned long *result;
+ uint32_t common = 0;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ if (flags & F_SHARE)
+ common = gem_create(i915, 4095);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 64; n <<= 1) { /* 32 == 500us per client */
+ int timeline = sw_sync_timeline_create();
+ int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+ const int nchild = n - 1; /* odd for easy medians */
+ const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct rusage old_usage, usage;
+ uint64_t cpu_time, d_time;
+ unsigned long vip = -1;
+ struct timespec tv;
+ struct igt_mean m;
+
+ if (flags & F_PING) {
+ struct intel_execution_engine2 *ping;
+
+ __for_each_physical_engine(i915, ping) {
+ if (ping->flags == e->flags)
+ continue;
+
+ igt_fork(child, 1) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, ping,
+ child_ns / 8,
+ -1, common,
+ F_SOLO | F_PACE | F_SHARE,
+ &result[nchild],
+ NULL);
+
+ gem_context_destroy(i915, ctx);
+ }
+ }
+ }
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+ getrusage(RUSAGE_CHILDREN, &old_usage);
+ igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
+ igt_fork(child, nchild) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ if (flags & F_VIP && child == 0) {
+ gem_context_set_priority(i915, ctx, MAX_PRIO);
+ flags |= F_FLOW;
+ }
+ if (flags & F_RRUL && child == 0)
+ flags |= F_SOLO | F_FLOW | F_SYNC;
+
+ fair_child(i915, ctx, e, child_ns,
+ timeline, common, flags,
+ &result[nchild],
+ &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nfences--)
+ timeline_advance(timeline, fence_ns);
+
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child]))
+ timeline_advance(timeline, fence_ns);
+ }
+
+ igt_waitchildren();
+ close(timeline);
+
+ d_time = igt_nsec_elapsed(&tv);
+ getrusage(RUSAGE_CHILDREN, &usage);
+ cpu_time = d_cpu_time(&usage, &old_usage);
+ if (10 * cpu_time > 9 * d_time) {
+ if (nchild > 7)
+ break;
+
+ igt_skip_on_f(10 * cpu_time > 9 * d_time,
+ "%.0f%% CPU usage, presuming capacity exceeded\n",
+ 100.* cpu_time / d_time);
+ }
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ if (flags & (F_VIP | F_RRUL))
+ vip = result[0];
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+ if (vip != -1) {
+ igt_info("VIP interval %.2f ms\n", 1e-6 * vip);
+ igt_assert(4 * vip > 3 * fence_ns &&
+ 3 * vip < 4 * fence_ns);
+ }
+
+ /* May be slowed due to sheer volume of context switches */
+ igt_assert(4 * igt_mean_get(&m) > 3 * fence_ns &&
+ igt_mean_get(&m) < 3 * fence_ns);
+
+ igt_assert(4 * igt_mean_get(&m) > 3 * result[nchild / 2] &&
+ 3 * igt_mean_get(&m) < 4 * result[nchild / 2]);
+
+ igt_assert(2 * (result[hi] - result[lo]) < result[nchild / 2]);
+ }
+
+ munmap(result, 4096);
+ if (common)
+ gem_close(i915, common);
+}
+
+static uint32_t read_ctx_timestamp(int i915,
+ uint32_t ctx,
+ const struct intel_execution_engine2 *e)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+ struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = gem_create(i915, 4096),
+ .offset = 32 << 20,
+ .relocs_ptr = to_user_pointer(&reloc),
+ .relocation_count = 1,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .flags = e->flags,
+ .rsvd1 = ctx,
+ };
+#define RUNTIME (base + 0x3a8)
+ uint32_t *map, *cs;
+ uint32_t ts;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, obj.handle,
+ 0, 4096, PROT_WRITE);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = RUNTIME;
+ memset(&reloc, 0, sizeof(reloc));
+ reloc.target_handle = obj.handle;
+ reloc.presumed_offset = obj.offset;
+ reloc.offset = offset_in_page(cs);
+ reloc.delta = 4000;
+ *cs++ = obj.offset + 4000;
+ *cs++ = obj.offset >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+ gem_close(i915, obj.handle);
+
+ ts = map[1000];
+ munmap(map, 4096);
+
+ return ts;
+}
+
+static void fairslice(int i915, const struct intel_execution_engine2 *e)
+{
+ igt_spin_t *spin[3];
+ uint32_t ctx[3];
+ uint32_t ts[3];
+
+ igt_require(gem_scheduler_has_semaphores(i915));
+ igt_require(gem_scheduler_has_preemption(i915));
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ ctx[i] = gem_context_clone_with_engines(i915, 0);
+ spin[i] = igt_spin_new(i915, .ctx = ctx[i], .engine = e->flags);
+ }
+
+ sleep(2); /* over the course of many timeslices */
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ igt_assert(gem_bo_busy(i915, spin[i]->handle));
+ igt_spin_end(spin[i]);
+
+ ts[i] = read_ctx_timestamp(i915, ctx[i], e);
+ }
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
+ igt_spin_free(i915, spin[i]);
+ gem_context_destroy(i915, ctx[i]);
+ }
+
+ qsort(ts, 3, sizeof(*ts), cmp_u32);
+ igt_info("%s: [%.1f, %.1f] ms\n", e->name,
+ 1e-6 * ticks_to_ns(i915, ts[0]),
+ 1e-6 * ticks_to_ns(i915, ts[2]));
+
+ igt_assert(ts[0] && ts[2] > ts[0]);
+ igt_assert(4 * ts[0] > 3 * ts[2]);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2561,6 +3222,9 @@ igt_main
test_each_engine("lateslice", fd, e)
lateslice(fd, e->flags);
+ test_each_engine("fairslice", fd, e)
+ fairslice(fd, e);
+
test_each_engine("submit-early-slice", fd, e)
submit_slice(fd, e, EARLY_SUBMIT);
test_each_engine("submit-golden-slice", fd, e)
@@ -2589,6 +3253,41 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_each_engine_store("fair-none", fd, e)
+ fairness(fd, e, 2, 0);
+ test_each_engine_store("fair-none-vip", fd, e)
+ fairness(fd, e, 2, F_VIP);
+ test_each_engine_store("fair-none-share", fd, e)
+ fairness(fd, e, 2, F_SHARE);
+ test_each_engine_store("fair-none-rrul", fd, e)
+ fairness(fd, e, 2, F_RRUL);
+ test_each_engine_store("fair-none-ping", fd, e)
+ fairness(fd, e, 2, F_PING);
+ test_each_engine_store("fair-pace", fd, e)
+ fairness(fd, e, 2, F_PACE);
+ test_each_engine_store("fair-pace-share", fd, e)
+ fairness(fd, e, 2, F_PACE | F_SHARE);
+ test_each_engine_store("fair-pace-ping", fd, e)
+ fairness(fd, e, 2, F_PACE | F_SHARE | F_PING);
+ test_each_engine_store("fair-sync", fd, e)
+ fairness(fd, e, 2, F_SYNC);
+ test_each_engine_store("fair-sync-vip", fd, e)
+ fairness(fd, e, 2, F_SYNC | F_VIP);
+ test_each_engine_store("fair-solo", fd, e)
+ fairness(fd, e, 2, F_SYNC | F_SOLO);
+ test_each_engine_store("fair-flow", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW);
+ test_each_engine_store("fair-flow-ping", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW | F_PING);
+ test_each_engine_store("fair-next", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW | F_NEXT);
+ test_each_engine_store("fair-next-share", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW | F_NEXT | F_SHARE);
+ test_each_engine_store("fair-spare", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW | F_SPARE);
+ test_each_engine_store("fair-half", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW | F_HALF);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.27.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
2020-06-02 8:22 Chris Wilson
2020-06-02 8:32 ` Chris Wilson
@ 2020-06-02 8:50 ` Chris Wilson
1 sibling, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-06-02 8:50 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 442 +++++++++++++++++++++++++++++++++
1 file changed, 442 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..ced9ee571 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,433 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(START_TS);
+
+ if (offset_in_page(cs) & 4)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(NOW_TS);
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { ONE, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeout,
+ int timeline,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *out)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+#define F_SPARE (1 << 8)
+{
+ const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+ struct drm_i915_gem_exec_object2 prev =
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 next =
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e);
+ int p_fence = -1, n_fence = -1;
+ unsigned long count = 0;
+ uint32_t *map;
+ int n;
+
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&next),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_FLOW) {
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, count);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+
+ execbuf.buffers_ptr = to_user_pointer(&ts);
+ execbuf.batch_start_offset = 2048;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACE && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+ close(execbuf.rsvd2);
+
+ if (flags & F_SYNC) {
+ struct pollfd pfd = {
+ .fd = n_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+
+ igt_swap(prev, next);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ close(p_fence);
+
+ gem_close(i915, next.handle);
+ gem_close(i915, prev.handle);
+
+ gem_sync(i915, ts.handle);
+ map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++) {
+ igt_assert(map[n]);
+ map[n - 1] = map[n] - map[n - 1];
+ }
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *out = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+
+ gem_close(i915, ts.handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+ unsigned long *result;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+ igt_require(e->class == I915_ENGINE_CLASS_RENDER || /* XXX excuse me? */
+ intel_gen(intel_get_drm_devid(i915)) < 11);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 16; n <<= 1) {
+ int timeline = sw_sync_timeline_create();
+ int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+ const int nchild = n - 1; /* odd for easy medians */
+ const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct igt_mean m;
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+ igt_fork(child, nchild) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, e, child_ns,
+ timeout, timeline, flags,
+ &result[nchild],
+ &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nfences--) {
+ struct timespec tv = { .tv_nsec = fence_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+ }
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child])) {
+ struct timespec tv = { .tv_nsec = fence_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+ }
+ }
+ igt_waitchildren();
+ close(timeline);
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+#if 0
+ /* Mean within 10% of target */
+ igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns &&
+ 10 * igt_mean_get(&m) < 9 * frame_ns);
+
+ /* Variance [inter-quartile range] is less than 33% of median */
+ igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]);
+#endif
+ }
+
+ munmap(result, 4096);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2589,6 +3016,21 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_each_engine_store("fair-none", fd, e)
+ fairness(fd, e, 2, 0);
+ test_each_engine_store("fair-pace", fd, e)
+ fairness(fd, e, 2, F_PACE);
+ test_each_engine_store("fair-sync", fd, e)
+ fairness(fd, e, 2, F_SYNC);
+ test_each_engine_store("fair-solo", fd, e)
+ fairness(fd, e, 2, F_SYNC | F_SOLO);
+ test_each_engine_store("fair-flow", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW);
+ test_each_engine_store("fair-spare", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW | F_SPARE);
+ test_each_engine_store("fair-half", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW | F_HALF);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.27.0.rc2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
2020-06-02 8:22 Chris Wilson
@ 2020-06-02 8:32 ` Chris Wilson
2020-06-02 8:50 ` Chris Wilson
1 sibling, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-06-02 8:32 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 440 +++++++++++++++++++++++++++++++++
1 file changed, 440 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..911379cad 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,431 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(START_TS);
+
+ if (offset_in_page(cs) & 4)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(NOW_TS);
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { ONE, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeout,
+ int timeline,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *out)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+#define F_SPARE (1 << 8)
+{
+ const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+ struct drm_i915_gem_exec_object2 prev =
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 next =
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e);
+ int p_fence = -1, n_fence = -1;
+ unsigned long count = 0;
+ uint32_t *map;
+ int n;
+
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&next),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_FLOW) {
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, count);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+
+ execbuf.buffers_ptr = to_user_pointer(&ts);
+ execbuf.batch_start_offset = 2048;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACE && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+ close(execbuf.rsvd2);
+
+ if (flags & F_SYNC) {
+ struct pollfd pfd = {
+ .fd = n_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+
+ igt_swap(prev, next);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ close(p_fence);
+
+ gem_close(i915, next.handle);
+ gem_close(i915, prev.handle);
+
+ gem_sync(i915, ts.handle);
+ map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++) {
+ igt_assert(map[n]);
+ map[n - 1] = map[n] - map[n - 1];
+ }
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *out = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+
+ gem_close(i915, ts.handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+ unsigned long *result;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 16; n <<= 1) {
+ int timeline = sw_sync_timeline_create();
+ int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+ const int nchild = n - 1; /* odd for easy medians */
+ const int child_ns = frame_ns / (nchild + !!(flags & F_SPARE));
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct igt_mean m;
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+ igt_fork(child, nchild) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, e, child_ns,
+ timeout, timeline, flags,
+ &result[nchild],
+ &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nfences--) {
+ struct timespec tv = { .tv_nsec = fence_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+ }
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child])) {
+ struct timespec tv = { .tv_nsec = fence_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+ }
+ }
+ igt_waitchildren();
+ close(timeline);
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+#if 0
+ /* Mean within 10% of target */
+ igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns &&
+ 10 * igt_mean_get(&m) < 9 * frame_ns);
+
+ /* Variance [inter-quartile range] is less than 33% of median */
+ igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]);
+#endif
+ }
+
+ munmap(result, 4096);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2589,6 +3014,21 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_each_engine_store("fair-none", fd, e)
+ fairness(fd, e, 2, 0);
+ test_each_engine_store("fair-pace", fd, e)
+ fairness(fd, e, 2, F_PACE);
+ test_each_engine_store("fair-sync", fd, e)
+ fairness(fd, e, 2, F_SYNC);
+ test_each_engine_store("fair-solo", fd, e)
+ fairness(fd, e, 2, F_SYNC | F_SOLO);
+ test_each_engine_store("fair-flow", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW);
+ test_each_engine_store("fair-spare", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW | F_SPARE);
+ test_each_engine_store("fair-half", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW | F_HALF);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.27.0.rc2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-02 8:22 Chris Wilson
2020-06-02 8:32 ` Chris Wilson
2020-06-02 8:50 ` Chris Wilson
0 siblings, 2 replies; 16+ messages in thread
From: Chris Wilson @ 2020-06-02 8:22 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 436 +++++++++++++++++++++++++++++++++
1 file changed, 436 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..3045eeb62 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,429 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(START_TS);
+
+ if (offset_in_page(cs) & 4)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(NOW_TS);
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { ONE, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeout,
+ int timeline,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *out)
+#define F_SYNC (1 << 0)
+#define F_PACE (1 << 1)
+#define F_FLOW (1 << 2)
+#define F_HALF (1 << 3)
+#define F_SOLO (1 << 4)
+{
+ const int batches_per_frame = flags & F_SOLO ? 1 : 3;
+ struct drm_i915_gem_exec_object2 prev =
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 next =
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e);
+ int p_fence = -1, n_fence = -1;
+ unsigned long count = 0;
+ uint32_t *map;
+ int n;
+
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&next),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & (F_FLOW | F_HALF)) {
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, count);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+
+ execbuf.buffers_ptr = to_user_pointer(&ts);
+ execbuf.batch_start_offset = 2048;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACE && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+ close(execbuf.rsvd2);
+
+ if (flags & F_SYNC) {
+ struct pollfd pfd = {
+ .fd = n_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+
+ igt_swap(prev, next);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ close(p_fence);
+
+ gem_close(i915, next.handle);
+ gem_close(i915, prev.handle);
+
+ gem_sync(i915, ts.handle);
+ map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++) {
+ igt_assert(map[n]);
+ map[n - 1] = map[n] - map[n - 1];
+ }
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *out = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+
+ gem_close(i915, ts.handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ const int fence_ns = flags & F_HALF ? 2 * frame_ns : frame_ns;
+ unsigned long *result;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 16; n <<= 1) {
+ int timeline = sw_sync_timeline_create();
+ int nfences = timeout * NSEC_PER_SEC / fence_ns + 1;
+ const int nchild = n - 1; /* odd for easy medians */
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct igt_mean m;
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+ igt_fork(child, nchild) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, e, frame_ns / nchild,
+ timeout, timeline, flags,
+ &result[nchild],
+ &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nfences--) {
+ struct timespec tv = { .tv_nsec = fence_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+ }
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child])) {
+ struct timespec tv = { .tv_nsec = fence_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+ }
+ }
+ igt_waitchildren();
+ close(timeline);
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+#if 0
+ /* Mean within 10% of target */
+ igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns &&
+ 10 * igt_mean_get(&m) < 9 * frame_ns);
+
+ /* Variance [inter-quartile range] is less than 33% of median */
+ igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]);
+#endif
+ }
+
+ munmap(result, 4096);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2589,6 +3012,19 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_each_engine_store("fair-none", fd, e)
+ fairness(fd, e, 2, 0);
+ test_each_engine_store("fair-pace", fd, e)
+ fairness(fd, e, 2, F_PACE);
+ test_each_engine_store("fair-sync", fd, e)
+ fairness(fd, e, 2, F_SYNC);
+ test_each_engine_store("fair-flow", fd, e)
+ fairness(fd, e, 2, F_PACE | F_FLOW);
+ test_each_engine_store("fair-half", fd, e)
+ fairness(fd, e, 2, F_PACE | F_HALF);
+ test_each_engine_store("fair-solo", fd, e)
+ fairness(fd, e, 2, F_SYNC | F_SOLO);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.27.0.rc2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-02 0:26 Chris Wilson
0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-06-02 0:26 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 418 +++++++++++++++++++++++++++++++++
1 file changed, 418 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..d1121ecd2 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,417 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ NSEC_PER_SEC);
+}
+
+static uint64_t ticks_to_ns(int i915, uint64_t ticks)
+{
+ return div64_u64_round_up(ticks * NSEC_PER_SEC,
+ read_timestamp_frequency(i915));
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(START_TS);
+
+ if (offset_in_page(cs) & 4)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(NOW_TS);
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void tslog(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define CS_TIMESTAMP (base + 0x358)
+ enum { ONE, MASK, ADDR };
+ uint32_t *timestamp_lo, *addr_lo;
+ uint32_t *map, *cs;
+
+ igt_require(base);
+
+ map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+ cs = map + 512;
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_TIMESTAMP;
+ timestamp_lo = cs;
+ *cs++ = addr;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR);
+ addr_lo = cs;
+ *cs++ = addr;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ADDR) + 4;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE);
+ *cs++ = 4;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(ONE) + 4;
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK);
+ *cs++ = 0xfffff7ff;
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(MASK) + 4;
+ *cs++ = 0xffffffff;
+
+ *cs++ = MI_MATH(8);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ONE));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_ADD;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(ADDR));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(MASK));
+ *cs++ = MI_MATH_AND;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(ADDR), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(timestamp_lo);
+ *cs++ = addr >> 32;
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(ADDR);
+ *cs++ = addr + offset_in_page(addr_lo);
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+tslog_create(int i915, uint32_t ctx, const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ tslog(i915, e, obj.handle, obj.offset);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeout,
+ int timeline,
+ unsigned int flags,
+ unsigned long *ctl,
+ unsigned long *out)
+#define F_PACING 0x1
+#define F_EXTERNAL 0x2
+{
+ const int batches_per_frame = 3;
+ struct drm_i915_gem_exec_object2 prev =
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 next =
+ delay_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 ts = tslog_create(i915, ctx, e);
+ struct timespec tv = {};
+ unsigned long count = 0;
+ int p_fence = -1, n_fence = -1;
+ uint32_t *map;
+ int n;
+
+ igt_nsec_elapsed(&tv);
+ while (!READ_ONCE(*ctl)) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&next),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .rsvd2 = -1,
+ .flags = e->flags,
+ };
+
+ if (flags & F_EXTERNAL) {
+ execbuf.rsvd2 =
+ sw_sync_timeline_create_fence(timeline, count);
+ execbuf.flags |= I915_EXEC_FENCE_IN;
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~(I915_EXEC_FENCE_OUT | I915_EXEC_FENCE_IN);
+ for (n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+
+ execbuf.buffers_ptr = to_user_pointer(&ts);
+ execbuf.batch_start_offset = 2048;
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACING && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+ close(execbuf.rsvd2);
+
+ igt_swap(prev, next);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ gem_sync(i915, prev.handle);
+ close(p_fence);
+
+ gem_close(i915, next.handle);
+ gem_close(i915, prev.handle);
+
+ map = gem_mmap__device_coherent(i915, ts.handle, 0, 4096, PROT_WRITE);
+ for (n = 1; n < min(count, 512); n++)
+ map[n - 1] = map[n] - map[n - 1];
+ qsort(map, --n, sizeof(*map), cmp_u32);
+ *out = ticks_to_ns(i915, map[n / 2]);
+ munmap(map, 4096);
+
+ gem_close(i915, ts.handle);
+}
+
+static int cmp_ul(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ unsigned long *result;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ igt_require(gem_class_has_mutable_submission(i915, e->class));
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 16; n <<= 1) {
+ int timeline = sw_sync_timeline_create();
+ int nframes = timeout * NSEC_PER_SEC / frame_ns + 1;
+ const int nchild = n - 1; /* odd for easy medians */
+ const int lo = nchild / 4;
+ const int hi = (3 * nchild + 3) / 4 - 1;
+ struct igt_mean m;
+
+ memset(result, 0, (nchild + 1) * sizeof(result[0]));
+ igt_fork(child, nchild) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+ fair_child(i915, ctx, e, frame_ns / nchild,
+ timeout, timeline, flags,
+ &result[nchild],
+ &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+
+ while (nframes--) {
+ struct timespec tv = { .tv_nsec = frame_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+ }
+ result[nchild] = 1;
+ for (int child = 0; child < nchild; child++) {
+ while (!READ_ONCE(result[child])) {
+ struct timespec tv = { .tv_nsec = frame_ns };
+ nanosleep(&tv, NULL);
+ sw_sync_timeline_inc(timeline, 1);
+ }
+ }
+ igt_waitchildren();
+ close(timeline);
+
+ igt_mean_init(&m);
+ for (int child = 0; child < nchild; child++)
+ igt_mean_add(&m, result[child]);
+
+ qsort(result, nchild, sizeof(*result), cmp_ul);
+ igt_info("%d clients, range: [%.1f, %.1f], iqr: [%.1f, %.1f], median: %.1f, mean: %.1f ± %.2f ms\n",
+ nchild,
+ 1e-6 * result[0], 1e-6 * result[nchild - 1],
+ 1e-6 * result[lo], 1e-6 * result[hi],
+ 1e-6 * result[nchild / 2],
+ 1e-6 * igt_mean_get(&m),
+ 1e-6 * sqrt(igt_mean_get_variance(&m)));
+
+#if 0
+ /* Mean within 10% of target */
+ igt_assert( 9 * igt_mean_get(&m) > 10 * frame_ns &&
+ 10 * igt_mean_get(&m) < 9 * frame_ns);
+
+ /* Variance [inter-quartile range] is less than 33% of median */
+ igt_assert(3 * result[hi] - result[lo] < result[nchild / 2]);
+#endif
+ }
+
+ munmap(result, 4096);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2589,6 +3000,13 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_each_engine_store("fair-none", fd, e)
+ fairness(fd, e, 2, 0);
+ test_each_engine_store("fair-pace", fd, e)
+ fairness(fd, e, 2, F_PACING);
+ test_each_engine_store("fair-sync", fd, e)
+ fairness(fd, e, 2, F_PACING | F_EXTERNAL);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.27.0.rc2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
2020-06-01 19:08 Chris Wilson
2020-06-01 19:53 ` Chris Wilson
@ 2020-06-01 21:17 ` Chris Wilson
1 sibling, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-06-01 21:17 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 253 +++++++++++++++++++++++++++++++++
1 file changed, 253 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..d58d926b1 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,254 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ 1000000000);
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void async_delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(START_TS);
+
+ if (offset_in_page(cs) & 4)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(NOW_TS);
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+timed_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ async_delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeout,
+ unsigned int flags,
+ unsigned long *out)
+#define F_PACING 0x1
+{
+ const int batches_per_frame = 3;
+ struct drm_i915_gem_exec_object2 prev =
+ timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 next =
+ timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct timespec tv = {};
+ unsigned long count = 0;
+ int p_fence = -1, n_fence = -1;
+
+ igt_nsec_elapsed(&tv);
+ igt_until_timeout(timeout) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&next),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~I915_EXEC_FENCE_OUT;
+ for (int n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACING && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+
+ igt_swap(prev, next);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ gem_sync(i915, prev.handle);
+ *out = igt_nsec_elapsed(&tv) / count;
+ close(p_fence);
+
+ gem_close(i915, next.handle);
+ gem_close(i915, prev.handle);
+}
+
+static int ul_cmp(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ unsigned long *result;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 16; n <<= 1) {
+ const int nchild = n - 1; /* odd for easy medians */
+ const int iqr_lo = nchild / 4;
+ const int iqr_hi = (3 * nchild + 3) / 4 - 1;
+ unsigned long iqr;
+
+ memset(result, 0, nchild * sizeof(result[0]));
+ igt_fork(child, nchild) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+
+ fair_child(i915, ctx, e, frame_ns / nchild,
+ timeout, flags, &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+ igt_waitchildren();
+
+ qsort(result, nchild, sizeof(*result), ul_cmp);
+ igt_info("%d clients, range: [%lu, %lu], iqr: [%lu, %lu], median: %lu\n",
+ nchild,
+ result[0], result[nchild - 1],
+ result[iqr_lo], result[iqr_hi],
+ result[nchild / 2]);
+
+ /* Median within 10% of target */
+ igt_assert(10 * result[nchild / 2] > 9 * frame_ns &&
+ 9 * result[nchild / 2] < 10 * frame_ns);
+
+ /* Variance [inter-quartile range] is less than 33% of median */
+ iqr = result[iqr_hi] - result[iqr_lo];
+ igt_assert(3 * iqr < result[nchild / 2]);
+ }
+
+ munmap(result, 4096);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2589,6 +2837,11 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_each_engine_store("fairness", fd, e)
+ fairness(fd, e, 3, F_PACING);
+ test_each_engine_store("unfairness", fd, e)
+ fairness(fd, e, 3, 0);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.27.0.rc2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
2020-06-01 19:08 Chris Wilson
@ 2020-06-01 19:53 ` Chris Wilson
2020-06-01 21:17 ` Chris Wilson
1 sibling, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2020-06-01 19:53 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 245 +++++++++++++++++++++++++++++++++
1 file changed, 245 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..5d91e94a3 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,246 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ 1000000000);
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void async_delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(START_TS);
+
+ if (offset_in_page(cs) & 4)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(NOW_TS);
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+timed_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ async_delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeout,
+ unsigned int flags,
+ unsigned long *out)
+#define F_PACING 0x1
+{
+ const int batches_per_frame = 3;
+ struct drm_i915_gem_exec_object2 prev =
+ timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 next =
+ timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct timespec tv = {};
+ unsigned long count = 0;
+ int p_fence = -1, n_fence = -1;
+
+ igt_nsec_elapsed(&tv);
+ igt_until_timeout(timeout) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&next),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ gem_execbuf_wr(i915, &execbuf);
+ n_fence = execbuf.rsvd2 >> 32;
+ execbuf.flags &= ~I915_EXEC_FENCE_OUT;
+ for (int n = 1; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+
+ if (flags & F_PACING && p_fence != -1) {
+ struct pollfd pfd = {
+ .fd = p_fence,
+ .events = POLLIN,
+ };
+ poll(&pfd, 1, -1);
+ }
+ close(p_fence);
+
+ igt_swap(prev, next);
+ igt_swap(p_fence, n_fence);
+ count++;
+ }
+ gem_sync(i915, prev.handle);
+ *out = igt_nsec_elapsed(&tv) / count;
+ close(p_fence);
+
+ gem_close(i915, next.handle);
+ gem_close(i915, prev.handle);
+}
+
+static int ul_cmp(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout, unsigned int flags)
+{
+ const int frame_ns = 16666 * 1000;
+ unsigned long *result;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 16; n <<= 1) {
+ int nchild = n - 1; /* odd for easy medians */
+
+ memset(result, 0, nchild * sizeof(result[0]));
+ igt_fork(child, nchild) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+
+ fair_child(i915, ctx, e, frame_ns / nchild,
+ timeout, flags, &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+ igt_waitchildren();
+
+ qsort(result, nchild, sizeof(*result), ul_cmp);
+ igt_info("%d clients, range: [%lu, %lu], median: %lu\n",
+ nchild, result[0], result[nchild-1], result[nchild/2]);
+
+ igt_assert(4 * result[0] > 3 * result[nchild-1]);
+ igt_assert(3 * result[0] < 4 * result[nchild-1]);
+
+ igt_assert(4 * result[nchild/2] > 3 * frame_ns);
+ igt_assert(3 * result[nchild/2] < 4 * frame_ns);
+ }
+
+ munmap(result, 4096);
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2589,6 +2829,11 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_each_engine_store("fairness", fd, e)
+ fairness(fd, e, 3, F_PACING);
+ test_each_engine_store("unfairness", fd, e)
+ fairness(fd, e, 3, 0);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.27.0.rc2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness
@ 2020-06-01 19:08 Chris Wilson
2020-06-01 19:53 ` Chris Wilson
2020-06-01 21:17 ` Chris Wilson
0 siblings, 2 replies; 16+ messages in thread
From: Chris Wilson @ 2020-06-01 19:08 UTC (permalink / raw)
To: intel-gfx; +Cc: igt-dev, Chris Wilson
An important property for multi-client systems is that each client gets
a 'fair' allotment of system time. (Where fairness is at the whim of the
context properties, such as priorities.) This test forks N independent
clients (albeit they happen to share a single vm), and does an equal
amount of work in client and asserts that they take an equal amount of
time.
Though we have never claimed to have a completely fair scheduler, that
is what is expected.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Ramalingam C <ramalingam.c@intel.com>
---
tests/i915/gem_exec_schedule.c | 224 +++++++++++++++++++++++++++++++++
1 file changed, 224 insertions(+)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 56c638833..0ec21bf54 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -2495,6 +2495,227 @@ static void measure_semaphore_power(int i915)
rapl_close(&pkg);
}
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ 1000000000);
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static void async_delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(START_TS);
+
+ if (offset_in_page(cs) & 4)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = TIMESTAMP;
+ *cs++ = CS_GPR(NOW_TS);
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+timed_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ async_delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static void fair_child(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t frame_ns,
+ int timeout,
+ unsigned long *out)
+{
+ const int batches_per_frame = 3;
+ struct drm_i915_gem_exec_object2 prev =
+ timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct drm_i915_gem_exec_object2 next =
+ timed_create(i915, ctx, e, frame_ns / batches_per_frame);
+ struct timespec tv = {};
+ unsigned long count = 0;
+
+ igt_nsec_elapsed(&tv);
+ igt_until_timeout(timeout) {
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&next),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ for (int n = 0; n < batches_per_frame; n++)
+ gem_execbuf(i915, &execbuf);
+
+ gem_sync(i915, prev.handle);
+ igt_swap(prev, next);
+ count++;
+ }
+ gem_sync(i915, prev.handle);
+ *out = igt_nsec_elapsed(&tv) / count;
+
+ gem_close(i915, next.handle);
+ gem_close(i915, prev.handle);
+}
+
+static int ul_cmp(const void *A, const void *B)
+{
+ const unsigned long *a = A, *b = B;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+static void fairness(int i915,
+ const struct intel_execution_engine2 *e,
+ int timeout)
+{
+ const int frame_ns = 16666 * 1000;
+ unsigned long *result;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+
+ result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+ for (int n = 2; n <= 16; n <<= 1) {
+ int nchild = n - 1; /* odd for easy medians */
+
+ memset(result, 0, nchild * sizeof(result[0]));
+ igt_fork(child, nchild) {
+ uint32_t ctx = gem_context_clone_with_engines(i915, 0);
+
+
+ fair_child(i915, ctx, e, frame_ns / nchild,
+ timeout, &result[child]);
+
+ gem_context_destroy(i915, ctx);
+ }
+ igt_waitchildren();
+
+ qsort(result, nchild, sizeof(*result), ul_cmp);
+ igt_info("%d clients, range: [%lu, %lu], median: %lu\n",
+ nchild, result[0], result[nchild-1], result[nchild/2]);
+
+ igt_assert(4 * result[0] > 3 * result[nchild-1]);
+ igt_assert(3 * result[0] < 4 * result[nchild-1]);
+
+ igt_assert(4 * result[nchild/2] > 3 * frame_ns);
+ igt_assert(3 * result[nchild/2] < 4 * frame_ns);
+ }
+}
+
#define test_each_engine(T, i915, e) \
igt_subtest_with_dynamic(T) __for_each_physical_engine(i915, e) \
igt_dynamic_f("%s", e->name)
@@ -2589,6 +2810,9 @@ igt_main
test_each_engine_store("promotion", fd, e)
promotion(fd, e->flags);
+ test_each_engine_store("fairness", fd, e)
+ fairness(fd, e, 3);
+
igt_subtest_group {
igt_fixture {
igt_require(gem_scheduler_has_preemption(fd));
--
2.27.0.rc2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
end of thread, other threads:[~2020-12-16 20:37 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-16 15:24 [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Chris Wilson
2020-12-16 15:24 ` [igt-dev] " Chris Wilson
2020-12-16 17:34 ` [igt-dev] ✓ Fi.CI.BAT: success for i915/gem_exec_schedule: Try to spot unfairness (rev14) Patchwork
2020-12-16 20:37 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
-- strict thread matches above, loose matches on Subject: below --
2020-12-10 2:09 [Intel-gfx] [PATCH i-g-t] i915/gem_exec_schedule: Try to spot unfairness Chris Wilson
2020-11-24 23:39 Chris Wilson
2020-08-03 13:57 Chris Wilson
2020-06-22 19:08 Chris Wilson
2020-06-09 12:45 Chris Wilson
2020-06-02 8:22 Chris Wilson
2020-06-02 8:32 ` Chris Wilson
2020-06-02 8:50 ` Chris Wilson
2020-06-02 0:26 Chris Wilson
2020-06-01 19:08 Chris Wilson
2020-06-01 19:53 ` Chris Wilson
2020-06-01 21:17 ` Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.