All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH i-g-t] gem_wsim: Use CTX_TIMESTAMP for timed spinners
@ 2020-07-14 12:36 ` Chris Wilson
  0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2020-07-14 12:36 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Use MI_MATH and MI_COND_BBE we can construct a loop that runs for a
precise number of clock cycles, as measured by the CTX_TIMESTAMP. We use
the CTX_TIMESTAMP (as opposed to the CS_TIMESTAMP) so that the elapsed
time is measured local to the context, and the length of the batch is
unaffected by preemption. Since the clock ticks at a known frequency, we
can directly translate the batch durations into cycles and so remove the
requirement for nop calibration, and the often excessively large nop
batches.

The downside to this is that we need to use engine local registers, and
before gen11 there is no support in the CS for relative mmio and so this
technique does not support transparent load balancing on a virtual
engine before Icelake.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 benchmarks/gem_wsim.c | 512 ++++++++++++++----------------------------
 1 file changed, 170 insertions(+), 342 deletions(-)

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index dbb46b9aa..b6e2f8adb 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -176,10 +176,9 @@ struct w_step
 
 	struct drm_i915_gem_execbuffer2 eb;
 	struct drm_i915_gem_exec_object2 *obj;
-	struct drm_i915_gem_relocation_entry reloc[1];
-	unsigned long bb_sz;
+	struct drm_i915_gem_relocation_entry reloc[3];
 	uint32_t bb_handle;
-	uint32_t *recursive_bb_start;
+	uint32_t *bb_duration;
 };
 
 struct ctx {
@@ -227,9 +226,7 @@ struct workload
 	unsigned int nrequest[NUM_ENGINES];
 };
 
-static const unsigned int nop_calibration_us = 1000;
-static bool has_nop_calibration = false;
-static bool sequential = true;
+static int ts_frequency;
 
 static unsigned int master_prng;
 
@@ -253,59 +250,58 @@ static const char *ring_str_map[NUM_ENGINES] = {
 	[VECS] = "VECS",
 };
 
-/* stores calibrations for particular engines */
-static unsigned long engine_calib_map[NUM_ENGINES];
-
-static enum intel_engine_id
-ci_to_engine_id(int class, int instance)
-{
-	static const struct {
-		int class;
-		int instance;
-		unsigned int id;
-	} map[] = {
-		{ I915_ENGINE_CLASS_RENDER, 0, RCS },
-		{ I915_ENGINE_CLASS_COPY, 0, BCS },
-		{ I915_ENGINE_CLASS_VIDEO, 0, VCS1 },
-		{ I915_ENGINE_CLASS_VIDEO, 1, VCS2 },
-		{ I915_ENGINE_CLASS_VIDEO, 2, VCS2 }, /* FIXME/ICL */
-		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS },
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
 	};
-
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(map); i++) {
-		if (class == map[i].class && instance == map[i].instance)
-			return map[i].id;
-	}
-	return -1;
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
 }
 
-static void
-apply_unset_calibrations(unsigned long raw_number)
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
 {
-	for (int i = 0; i < NUM_ENGINES; i++)
-		engine_calib_map[i] += engine_calib_map[i] ? 0 : raw_number;
+	return (x + y - 1) / y;
 }
 
-static void
-print_engine_calibrations(void)
+static uint64_t ns_to_ticks(uint64_t ns)
 {
-	bool first_entry = true;
+	return div64_u64_round_up(ns * ts_frequency, 1000000000);
+}
 
-	printf("Nop calibration for %uus delay is: ", nop_calibration_us);
-	for (int i = 0; i < NUM_ENGINES; i++) {
-		/* skip engines not present and DEFAULT and VCS */
-		if (i != DEFAULT && i != VCS && engine_calib_map[i]) {
-			if (first_entry) {
-				printf("%s=%lu", ring_str_map[i], engine_calib_map[i]);
-				first_entry = false;
-			} else {
-				printf(",%s=%lu", ring_str_map[i], engine_calib_map[i]);
-			}
-		}
-	}
-	printf("\n");
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+#define   MI_CS_MMIO (1 << 19)
+
+static unsigned int offset_in_page(void *addr)
+{
+	return (uintptr_t)addr & 4095;
 }
 
 static void add_dep(struct deps *deps, struct dep_entry entry)
@@ -1392,91 +1388,121 @@ __get_ctx(struct workload *wrk, const struct w_step *w)
 	return &wrk->ctx_list[w->context];
 }
 
-static unsigned long
-__get_bb_sz(const struct w_step *w, unsigned int duration)
+static uint32_t mmio_base(enum intel_engine_id engine, int gen)
 {
-	enum intel_engine_id engine = w->engine;
-	struct ctx *ctx = __get_ctx(w->wrk, w);
-	unsigned long d;
-
-	if (ctx->engine_map && engine == DEFAULT)
-		/* Assume first engine calibration. */
-		engine = ctx->engine_map[0];
+	uint32_t mmio = 0;
 
-	igt_assert(engine_calib_map[engine]);
-	d = ALIGN(duration * engine_calib_map[engine] * sizeof(uint32_t) /
-		  nop_calibration_us,
-		  sizeof(uint32_t));
-
-	return d;
-}
-
-static unsigned long
-get_bb_sz(const struct w_step *w, unsigned int duration)
-{
-	unsigned long d = __get_bb_sz(w, duration);
-
-	igt_assert(d);
-
-	return d;
-}
+	if (gen >= 11) /* use relative mmio */
+		return 0;
 
-static void init_bb(struct w_step *w)
-{
-	const unsigned int arb_period =
-			__get_bb_sz(w, w->preempt_us) / sizeof(uint32_t);
-	const unsigned int mmap_len = ALIGN(w->bb_sz, 4096);
-	unsigned int i;
-	uint32_t *ptr;
+	switch (engine) {
+	case NUM_ENGINES:
+		break;
 
-	if (w->unbound_duration || !arb_period)
-		return;
+	case DEFAULT:
+	case RCS:
+		mmio = 0x2000;
+		break;
 
-	gem_set_domain(fd, w->bb_handle,
-		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+	case BCS:
+		mmio = 0x22000;
+		break;
 
-	ptr = gem_mmap__wc(fd, w->bb_handle, 0, mmap_len, PROT_WRITE);
+	case VCS:
+	case VCS1:
+	case VCS2: /* XXX */
+		if (gen < 6)
+			mmio = 0x4000;
+		else if (gen < 11)
+			mmio = 0x12000;
+		else
+			mmio = 0x1c0000;
+		break;
 
-	for (i = arb_period; i < w->bb_sz / sizeof(uint32_t); i += arb_period)
-		ptr[i] = 0x5 << 23; /* MI_ARB_CHK */
+	case VECS:
+		if (gen < 11)
+			mmio = 0x1a000;
+		else
+			mmio = 0x1c8000;
+		break;
+	}
 
-	munmap(ptr, mmap_len);
+	return mmio;
 }
 
-static unsigned int terminate_bb(struct w_step *w)
+static unsigned int create_bb(struct w_step *w, int self)
 {
-	const uint32_t bbe = 0xa << 23;
-	unsigned long mmap_start, mmap_len;
-	unsigned long batch_start = w->bb_sz;
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const uint32_t base = mmio_base(w->engine, gen);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	const int use_64b = gen >= 8;
+	enum { START_TS, NOW_TS };
+	uint32_t *ptr, *cs, *jmp;
 	unsigned int r = 0;
-	uint32_t *ptr, *cs;
-
-	batch_start -= sizeof(uint32_t); /* bbend */
-
-	if (w->unbound_duration)
-		batch_start -= 4 * sizeof(uint32_t); /* MI_ARB_CHK + MI_BATCH_BUFFER_START */
-
-	mmap_start = rounddown(batch_start, PAGE_SIZE);
-	mmap_len = ALIGN(w->bb_sz - mmap_start, PAGE_SIZE);
 
 	gem_set_domain(fd, w->bb_handle,
 		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
 
-	ptr = gem_mmap__wc(fd, w->bb_handle, mmap_start, mmap_len, PROT_WRITE);
-	cs = (uint32_t *)((char *)ptr + batch_start - mmap_start);
+	cs = ptr = gem_mmap__wc(fd, w->bb_handle, 0, 4096, PROT_WRITE);
 
-	if (w->unbound_duration) {
-		w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t);
-		batch_start += 4 * sizeof(uint32_t);
+	*cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
 
-		*cs++ = w->preempt_us ? 0x5 << 23 /* MI_ARB_CHK; */ : MI_NOOP;
-		w->recursive_bb_start = cs;
-		*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+	if (offset_in_page(cs) & 4)
 		*cs++ = 0;
+	jmp = cs;
+
+	if (w->preempt_us)
+		*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b) | MI_CS_MMIO; /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	w->reloc[r].target_handle = self;
+	w->reloc[r].offset = offset_in_page(cs);
+	*cs++ = w->reloc[r].delta = 4000;
+	*cs++ = 0;
+	r++;
+
+	/* Delay between SRM and COND_BBE to post the writes */
+	for (int n = 0; n < 8; n++) {
+		*cs++ = MI_INSTR(0x21, 1);
+		*cs++ = 2048;
 		*cs++ = 0;
 	}
 
-	*cs = bbe;
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	w->bb_duration = cs;
+	*cs++ = 0;
+	w->reloc[r].target_handle = self;
+	w->reloc[r].offset = offset_in_page(cs);
+	*cs++ = w->reloc[r].delta = 4000;
+	*cs++ = 0;
+	r++;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	w->reloc[r].target_handle = self;
+	w->reloc[r].offset = offset_in_page(cs);
+	*cs++ = w->reloc[r].delta = offset_in_page(jmp);
+	*cs++ = 0;
+	r++;
 
 	return r;
 }
@@ -1590,23 +1616,10 @@ alloc_step_batch(struct workload *wrk, struct w_step *w)
 		igt_assert(j < nr_obj);
 	}
 
-	if (w->unbound_duration)
-		/* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
-		w->bb_sz = max(PAGE_SIZE, __get_bb_sz(w, w->preempt_us)) +
-			   (1 + 3) * sizeof(uint32_t);
-	else
-		w->bb_sz = get_bb_sz(w, w->duration.max);
-
-	w->bb_handle = w->obj[j].handle =
-		alloc_bo(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0));
-	init_bb(w);
-	w->obj[j].relocation_count = terminate_bb(w);
-
-	if (w->obj[j].relocation_count) {
-		igt_assert(w->unbound_duration);
-		w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
-		w->reloc[0].target_handle = j;
-	}
+	w->bb_handle = w->obj[j].handle = gem_create(fd, 4096);
+	w->obj[j].relocation_count = create_bb(w, j);
+	igt_assert(w->obj[j].relocation_count <= ARRAY_SIZE(w->reloc));
+	w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
 
 	w->eb.buffers_ptr = to_user_pointer(w->obj);
 	w->eb.buffer_count = j + 1;
@@ -1617,8 +1630,8 @@ alloc_step_batch(struct workload *wrk, struct w_step *w)
 	printf("%u: %u:|", w->idx, w->eb.buffer_count);
 	for (i = 0; i <= j; i++)
 		printf("%x|", w->obj[i].handle);
-	printf(" %10lu flags=%llx bb=%x[%u] ctx[%u]=%u\n",
-		w->bb_sz, w->eb.flags, w->bb_handle, j, w->context,
+	printf(" flags=%llx bb=%x[%u] ctx[%u]=%u\n",
+		w->eb.flags, w->bb_handle, j, w->context,
 		get_ctxid(wrk, w));
 #endif
 }
@@ -1803,7 +1816,7 @@ static void measure_active_set(struct workload *wrk)
 		if (w->type != BATCH)
 			continue;
 
-		batch_sizes += w->bb_sz;
+		batch_sizes += 4096;
 
 		for (j = 0; j < w->data_deps.nr; j++) {
 			struct dep_entry *dep = &w->data_deps.list[j];
@@ -1904,6 +1917,10 @@ static int prepare_workload(unsigned int id, struct workload *wrk)
 					wsim_err("Load balancing needs an engine map!\n");
 					return 1;
 				}
+				if (intel_gen(intel_get_drm_devid(fd)) < 11) {
+					wsim_err("Load balancing needs relative mmio support, gen11+!\n");
+					return 1;
+				}
 				ctx->load_balance = w->load_balance;
 			} else if (w->type == BOND) {
 				if (!ctx->load_balance) {
@@ -2163,15 +2180,17 @@ static int elapsed_us(const struct timespec *start, const struct timespec *end)
 }
 
 static void
-update_bb_start(struct w_step *w)
+update_bb_start(struct workload *wrk, struct w_step *w)
 {
-	if (!w->unbound_duration)
-		return;
+	uint32_t ticks;
 
 	gem_set_domain(fd, w->bb_handle,
 		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
 
-	*w->recursive_bb_start = MI_BATCH_BUFFER_START | (1 << 8) | 1;
+	ticks = 0;
+	if (!w->unbound_duration)
+		ticks = ~ns_to_ticks(1000 * get_duration(wrk, w));
+	*w->bb_duration = ticks;
 }
 
 static void w_sync_to(struct workload *wrk, struct w_step *w, int target)
@@ -2198,13 +2217,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine)
 	unsigned int i;
 
 	eb_update_flags(wrk, w, engine);
-	update_bb_start(w);
-
-	w->eb.batch_start_offset =
-		w->unbound_duration ?
-		0 :
-		ALIGN(w->bb_sz - get_bb_sz(w, get_duration(wrk, w)),
-		      2 * sizeof(uint32_t));
+	update_bb_start(wrk, w);
 
 	for (i = 0; i < w->fence_deps.nr; i++) {
 		int tgt = w->idx + w->fence_deps.list[i].target;
@@ -2353,8 +2366,7 @@ static void *run_workload(void *data)
 				igt_assert(wrk->steps[t_idx].type == BATCH);
 				igt_assert(wrk->steps[t_idx].unbound_duration);
 
-				*wrk->steps[t_idx].recursive_bb_start =
-					MI_BATCH_BUFFER_END;
+				*wrk->steps[t_idx].bb_duration = 0xffffffff;
 				__sync_synchronize();
 				continue;
 			} else if (w->type == SSEU) {
@@ -2467,131 +2479,15 @@ static void fini_workload(struct workload *wrk)
 	free(wrk);
 }
 
-static unsigned long calibrate_nop(unsigned int tolerance_pct, struct intel_execution_engine2 *engine)
-{
-	const uint32_t bbe = 0xa << 23;
-	unsigned int loops = 17;
-	unsigned int usecs = nop_calibration_us;
-	struct drm_i915_gem_exec_object2 obj = {};
-	struct drm_i915_gem_execbuffer2 eb = {
-		.buffer_count = 1,
-		.buffers_ptr = (uintptr_t)&obj,
-		.flags = engine->flags
-	};
-	long size, last_size;
-	struct timespec t_0, t_end;
-
-	clock_gettime(CLOCK_MONOTONIC, &t_0);
-
-	size = 256 * 1024;
-	do {
-		struct timespec t_start;
-
-		obj.handle = alloc_bo(fd, size);
-		gem_write(fd, obj.handle, size - sizeof(bbe), &bbe,
-			  sizeof(bbe));
-		gem_execbuf(fd, &eb);
-		gem_sync(fd, obj.handle);
-
-		clock_gettime(CLOCK_MONOTONIC, &t_start);
-		for (int loop = 0; loop < loops; loop++)
-			gem_execbuf(fd, &eb);
-		gem_sync(fd, obj.handle);
-		clock_gettime(CLOCK_MONOTONIC, &t_end);
-
-		gem_close(fd, obj.handle);
-
-		last_size = size;
-		size = loops * size / elapsed(&t_start, &t_end) / 1e6 * usecs;
-		size = ALIGN(size, sizeof(uint32_t));
-	} while (elapsed(&t_0, &t_end) < 5 ||
-		 labs(size - last_size) > (size * tolerance_pct / 100));
-
-	return size / sizeof(uint32_t);
-}
-
-static void
-calibrate_sequentially(void)
-{
-	struct intel_execution_engine2 *engine;
-	enum intel_engine_id eng_id;
-
-	__for_each_physical_engine(fd, engine) {
-		eng_id = ci_to_engine_id(engine->class, engine->instance);
-		igt_assert(eng_id >= 0);
-		engine_calib_map[eng_id] = calibrate_nop(fd, engine);
-	}
-}
-
-struct thread_data {
-	struct intel_execution_engine2 *eng;
-	pthread_t thr;
-	unsigned long calib;
-};
-
-static void *
-engine_calibration_thread(void *data)
-{
-	struct thread_data *thr_d = (struct thread_data *) data;
-
-	thr_d->calib = calibrate_nop(fd, thr_d->eng);
-	return NULL;
-}
-
-static void
-calibrate_in_parallel(void)
-{
-	struct thread_data *thr_d = malloc(NUM_ENGINES * sizeof(*thr_d));
-	struct intel_execution_engine2 *engine;
-	enum intel_engine_id id;
-	int ret;
-
-	__for_each_physical_engine(fd, engine) {
-		id = ci_to_engine_id(engine->class, engine->instance);
-		thr_d[id].eng = engine;
-		ret = pthread_create(&thr_d[id].thr, NULL, engine_calibration_thread, &thr_d[id]);
-		igt_assert_eq(ret, 0);
-	}
-
-	__for_each_physical_engine(fd, engine) {
-		id = ci_to_engine_id(engine->class, engine->instance);
-		igt_assert(id >= 0);
-
-		ret = pthread_join(thr_d[id].thr, NULL);
-		igt_assert_eq(ret, 0);
-		engine_calib_map[id] = thr_d[id].calib;
-	}
-
-	free(thr_d);
-}
-
-static void
-calibrate_engines(void)
-{
-	if (sequential)
-		calibrate_sequentially();
-	else
-		calibrate_in_parallel();
-}
-
 static void print_help(void)
 {
 	puts(
 "Usage: gem_wsim [OPTIONS]\n"
 "\n"
 "Runs a simulated workload on the GPU.\n"
-"When ran without arguments performs a GPU calibration result of which needs to\n"
-"be provided when running the simulation in subsequent invocations.\n"
-"\n"
 "Options:\n"
 "  -h                This text.\n"
 "  -q                Be quiet - do not output anything to stdout.\n"
-"  -n <n |           Nop calibration value - single value is set to all engines\n"
-"  e1=v1,e2=v2,n...> without specified value; you can also specify calibrations for\n"
-"                    particular engines.\n"
-"  -t <n>            Nop calibration tolerance percentage.\n"
-"  -T                Disable sequential calibration and perform calibration in parallel.\n"
-"                    Use when there is a difficulty obtaining calibration with the\n"
 "                    default settings.\n"
 "  -I <n>            Initial randomness seed.\n"
 "  -p <n>            Context priority to use for the following workload on the\n"
@@ -2671,17 +2567,12 @@ int main(int argc, char **argv)
 	int master_workload = -1;
 	char *append_workload_arg = NULL;
 	struct w_arg *w_args = NULL;
-	unsigned int tolerance_pct = 1;
 	int exitcode = EXIT_FAILURE;
 	double scale_time = 1.0f;
 	double scale_dur = 1.0f;
 	int prio = 0;
 	double t;
 	int i, c;
-	char *subopts, *value;
-	int raw_number = 0;
-	long calib_val;
-	int eng;
 
 	/*
 	 * Open the device via the low-level API so we can do the GPU quiesce
@@ -2721,70 +2612,7 @@ int main(int argc, char **argv)
 		case 'c':
 			clients = strtol(optarg, NULL, 0);
 			break;
-		case 't':
-			tolerance_pct = strtol(optarg, NULL, 0);
-			break;
-		case 'T':
-			sequential = false;
-			break;
-
-		case 'n':
-			subopts = optarg;
-			while (*subopts != '\0') {
-				eng = getsubopt(&subopts, (char **)ring_str_map, &value);
-				if (!value) {
-					/* only engine name was given */
-					wsim_err("Missing calibration value for '%s'!\n",
-						ring_str_map[eng]);
-					goto err;
-				}
 
-				calib_val = atol(value);
-
-				if (eng >= 0 && eng < NUM_ENGINES) {
-				/* engine name with some value were given */
-
-					if (eng == DEFAULT || eng == VCS) {
-						wsim_err("'%s' not allowed in engine calibrations!\n",
-							ring_str_map[eng]);
-						goto err;
-					} else if (calib_val <= 0) {
-						wsim_err("Invalid calibration for engine '%s' - value "
-						"is either non-positive or is not a number!\n",
-							ring_str_map[eng]);
-						goto err;
-					} else if (engine_calib_map[eng]) {
-						wsim_err("Invalid repeated calibration of '%s'!\n",
-							ring_str_map[eng]);
-						goto err;
-					} else {
-						engine_calib_map[eng] = calib_val;
-						if (eng == RCS)
-							engine_calib_map[DEFAULT] = calib_val;
-						else if (eng == VCS1 || eng == VCS2)
-							engine_calib_map[VCS] = calib_val;
-						has_nop_calibration = true;
-					}
-				} else {
-					/* raw number was given */
-
-					if (!calib_val) {
-						wsim_err("Invalid engine or zero calibration!\n");
-						goto err;
-					} else if (calib_val < 0) {
-						wsim_err("Invalid negative calibration!\n");
-						goto err;
-					} else if (raw_number) {
-						wsim_err("Default engine calibration provided more than once!\n");
-						goto err;
-					} else {
-						raw_number = calib_val;
-						apply_unset_calibrations(raw_number);
-						has_nop_calibration = true;
-					}
-				}
-			}
-			break;
 		case 'r':
 			repeat = strtol(optarg, NULL, 0);
 			break;
@@ -2812,6 +2640,9 @@ int main(int argc, char **argv)
 		case 'F':
 			scale_time = atof(optarg);
 			break;
+		case 'n':
+			/* ignored; using HW timers */
+			break;
 		case 'h':
 			print_help();
 			goto out;
@@ -2820,17 +2651,15 @@ int main(int argc, char **argv)
 		}
 	}
 
-	if (!has_nop_calibration) {
-		if (verbose > 1) {
-			printf("Calibrating nop delays with %u%% tolerance...\n",
-				tolerance_pct);
-		}
-
-		calibrate_engines();
+	if (intel_gen(intel_get_drm_devid(fd)) < 8) {
+		wsim_err("gen8+ is required for CTX_TIMESTAMP timers\n");
+		return 1;
+	}
 
-		if (verbose)
-			print_engine_calibrations();
-		goto out;
+	ts_frequency = read_timestamp_frequency(fd);
+	if (!ts_frequency) {
+		wsim_err("Unable to read timestamp frequency of GPU\n");
+		goto err;
 	}
 
 	if (!nr_w_args) {
@@ -2885,7 +2714,6 @@ int main(int argc, char **argv)
 
 	if (verbose > 1) {
 		printf("Random seed is %u.\n", master_prng);
-		print_engine_calibrations();
 		printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
 	}
 
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [igt-dev] [PATCH i-g-t] gem_wsim: Use CTX_TIMESTAMP for timed spinners
@ 2020-07-14 12:36 ` Chris Wilson
  0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2020-07-14 12:36 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev, Chris Wilson, Tvrtko Ursulin

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Use MI_MATH and MI_COND_BBE we can construct a loop that runs for a
precise number of clock cycles, as measured by the CTX_TIMESTAMP. We use
the CTX_TIMESTAMP (as opposed to the CS_TIMESTAMP) so that the elapsed
time is measured local to the context, and the length of the batch is
unaffected by preemption. Since the clock ticks at a known frequency, we
can directly translate the batch durations into cycles and so remove the
requirement for nop calibration, and the often excessively large nop
batches.

The downside to this is that we need to use engine local registers, and
before gen11 there is no support in the CS for relative mmio and so this
technique does not support transparent load balancing on a virtual
engine before Icelake.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 benchmarks/gem_wsim.c | 512 ++++++++++++++----------------------------
 1 file changed, 170 insertions(+), 342 deletions(-)

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index dbb46b9aa..b6e2f8adb 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -176,10 +176,9 @@ struct w_step
 
 	struct drm_i915_gem_execbuffer2 eb;
 	struct drm_i915_gem_exec_object2 *obj;
-	struct drm_i915_gem_relocation_entry reloc[1];
-	unsigned long bb_sz;
+	struct drm_i915_gem_relocation_entry reloc[3];
 	uint32_t bb_handle;
-	uint32_t *recursive_bb_start;
+	uint32_t *bb_duration;
 };
 
 struct ctx {
@@ -227,9 +226,7 @@ struct workload
 	unsigned int nrequest[NUM_ENGINES];
 };
 
-static const unsigned int nop_calibration_us = 1000;
-static bool has_nop_calibration = false;
-static bool sequential = true;
+static int ts_frequency;
 
 static unsigned int master_prng;
 
@@ -253,59 +250,58 @@ static const char *ring_str_map[NUM_ENGINES] = {
 	[VECS] = "VECS",
 };
 
-/* stores calibrations for particular engines */
-static unsigned long engine_calib_map[NUM_ENGINES];
-
-static enum intel_engine_id
-ci_to_engine_id(int class, int instance)
-{
-	static const struct {
-		int class;
-		int instance;
-		unsigned int id;
-	} map[] = {
-		{ I915_ENGINE_CLASS_RENDER, 0, RCS },
-		{ I915_ENGINE_CLASS_COPY, 0, BCS },
-		{ I915_ENGINE_CLASS_VIDEO, 0, VCS1 },
-		{ I915_ENGINE_CLASS_VIDEO, 1, VCS2 },
-		{ I915_ENGINE_CLASS_VIDEO, 2, VCS2 }, /* FIXME/ICL */
-		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS },
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
 	};
-
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(map); i++) {
-		if (class == map[i].class && instance == map[i].instance)
-			return map[i].id;
-	}
-	return -1;
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
 }
 
-static void
-apply_unset_calibrations(unsigned long raw_number)
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
 {
-	for (int i = 0; i < NUM_ENGINES; i++)
-		engine_calib_map[i] += engine_calib_map[i] ? 0 : raw_number;
+	return (x + y - 1) / y;
 }
 
-static void
-print_engine_calibrations(void)
+static uint64_t ns_to_ticks(uint64_t ns)
 {
-	bool first_entry = true;
+	return div64_u64_round_up(ns * ts_frequency, 1000000000);
+}
 
-	printf("Nop calibration for %uus delay is: ", nop_calibration_us);
-	for (int i = 0; i < NUM_ENGINES; i++) {
-		/* skip engines not present and DEFAULT and VCS */
-		if (i != DEFAULT && i != VCS && engine_calib_map[i]) {
-			if (first_entry) {
-				printf("%s=%lu", ring_str_map[i], engine_calib_map[i]);
-				first_entry = false;
-			} else {
-				printf(",%s=%lu", ring_str_map[i], engine_calib_map[i]);
-			}
-		}
-	}
-	printf("\n");
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+#define   MI_CS_MMIO (1 << 19)
+
+static unsigned int offset_in_page(void *addr)
+{
+	return (uintptr_t)addr & 4095;
 }
 
 static void add_dep(struct deps *deps, struct dep_entry entry)
@@ -1392,91 +1388,121 @@ __get_ctx(struct workload *wrk, const struct w_step *w)
 	return &wrk->ctx_list[w->context];
 }
 
-static unsigned long
-__get_bb_sz(const struct w_step *w, unsigned int duration)
+static uint32_t mmio_base(enum intel_engine_id engine, int gen)
 {
-	enum intel_engine_id engine = w->engine;
-	struct ctx *ctx = __get_ctx(w->wrk, w);
-	unsigned long d;
-
-	if (ctx->engine_map && engine == DEFAULT)
-		/* Assume first engine calibration. */
-		engine = ctx->engine_map[0];
+	uint32_t mmio = 0;
 
-	igt_assert(engine_calib_map[engine]);
-	d = ALIGN(duration * engine_calib_map[engine] * sizeof(uint32_t) /
-		  nop_calibration_us,
-		  sizeof(uint32_t));
-
-	return d;
-}
-
-static unsigned long
-get_bb_sz(const struct w_step *w, unsigned int duration)
-{
-	unsigned long d = __get_bb_sz(w, duration);
-
-	igt_assert(d);
-
-	return d;
-}
+	if (gen >= 11) /* use relative mmio */
+		return 0;
 
-static void init_bb(struct w_step *w)
-{
-	const unsigned int arb_period =
-			__get_bb_sz(w, w->preempt_us) / sizeof(uint32_t);
-	const unsigned int mmap_len = ALIGN(w->bb_sz, 4096);
-	unsigned int i;
-	uint32_t *ptr;
+	switch (engine) {
+	case NUM_ENGINES:
+		break;
 
-	if (w->unbound_duration || !arb_period)
-		return;
+	case DEFAULT:
+	case RCS:
+		mmio = 0x2000;
+		break;
 
-	gem_set_domain(fd, w->bb_handle,
-		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+	case BCS:
+		mmio = 0x22000;
+		break;
 
-	ptr = gem_mmap__wc(fd, w->bb_handle, 0, mmap_len, PROT_WRITE);
+	case VCS:
+	case VCS1:
+	case VCS2: /* XXX */
+		if (gen < 6)
+			mmio = 0x4000;
+		else if (gen < 11)
+			mmio = 0x12000;
+		else
+			mmio = 0x1c0000;
+		break;
 
-	for (i = arb_period; i < w->bb_sz / sizeof(uint32_t); i += arb_period)
-		ptr[i] = 0x5 << 23; /* MI_ARB_CHK */
+	case VECS:
+		if (gen < 11)
+			mmio = 0x1a000;
+		else
+			mmio = 0x1c8000;
+		break;
+	}
 
-	munmap(ptr, mmap_len);
+	return mmio;
 }
 
-static unsigned int terminate_bb(struct w_step *w)
+static unsigned int create_bb(struct w_step *w, int self)
 {
-	const uint32_t bbe = 0xa << 23;
-	unsigned long mmap_start, mmap_len;
-	unsigned long batch_start = w->bb_sz;
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const uint32_t base = mmio_base(w->engine, gen);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	const int use_64b = gen >= 8;
+	enum { START_TS, NOW_TS };
+	uint32_t *ptr, *cs, *jmp;
 	unsigned int r = 0;
-	uint32_t *ptr, *cs;
-
-	batch_start -= sizeof(uint32_t); /* bbend */
-
-	if (w->unbound_duration)
-		batch_start -= 4 * sizeof(uint32_t); /* MI_ARB_CHK + MI_BATCH_BUFFER_START */
-
-	mmap_start = rounddown(batch_start, PAGE_SIZE);
-	mmap_len = ALIGN(w->bb_sz - mmap_start, PAGE_SIZE);
 
 	gem_set_domain(fd, w->bb_handle,
 		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
 
-	ptr = gem_mmap__wc(fd, w->bb_handle, mmap_start, mmap_len, PROT_WRITE);
-	cs = (uint32_t *)((char *)ptr + batch_start - mmap_start);
+	cs = ptr = gem_mmap__wc(fd, w->bb_handle, 0, 4096, PROT_WRITE);
 
-	if (w->unbound_duration) {
-		w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t);
-		batch_start += 4 * sizeof(uint32_t);
+	*cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
 
-		*cs++ = w->preempt_us ? 0x5 << 23 /* MI_ARB_CHK; */ : MI_NOOP;
-		w->recursive_bb_start = cs;
-		*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+	if (offset_in_page(cs) & 4)
 		*cs++ = 0;
+	jmp = cs;
+
+	if (w->preempt_us)
+		*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b) | MI_CS_MMIO; /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	w->reloc[r].target_handle = self;
+	w->reloc[r].offset = offset_in_page(cs);
+	*cs++ = w->reloc[r].delta = 4000;
+	*cs++ = 0;
+	r++;
+
+	/* Delay between SRM and COND_BBE to post the writes */
+	for (int n = 0; n < 8; n++) {
+		*cs++ = MI_INSTR(0x21, 1);
+		*cs++ = 2048;
 		*cs++ = 0;
 	}
 
-	*cs = bbe;
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	w->bb_duration = cs;
+	*cs++ = 0;
+	w->reloc[r].target_handle = self;
+	w->reloc[r].offset = offset_in_page(cs);
+	*cs++ = w->reloc[r].delta = 4000;
+	*cs++ = 0;
+	r++;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	w->reloc[r].target_handle = self;
+	w->reloc[r].offset = offset_in_page(cs);
+	*cs++ = w->reloc[r].delta = offset_in_page(jmp);
+	*cs++ = 0;
+	r++;
 
 	return r;
 }
@@ -1590,23 +1616,10 @@ alloc_step_batch(struct workload *wrk, struct w_step *w)
 		igt_assert(j < nr_obj);
 	}
 
-	if (w->unbound_duration)
-		/* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
-		w->bb_sz = max(PAGE_SIZE, __get_bb_sz(w, w->preempt_us)) +
-			   (1 + 3) * sizeof(uint32_t);
-	else
-		w->bb_sz = get_bb_sz(w, w->duration.max);
-
-	w->bb_handle = w->obj[j].handle =
-		alloc_bo(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0));
-	init_bb(w);
-	w->obj[j].relocation_count = terminate_bb(w);
-
-	if (w->obj[j].relocation_count) {
-		igt_assert(w->unbound_duration);
-		w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
-		w->reloc[0].target_handle = j;
-	}
+	w->bb_handle = w->obj[j].handle = gem_create(fd, 4096);
+	w->obj[j].relocation_count = create_bb(w, j);
+	igt_assert(w->obj[j].relocation_count <= ARRAY_SIZE(w->reloc));
+	w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
 
 	w->eb.buffers_ptr = to_user_pointer(w->obj);
 	w->eb.buffer_count = j + 1;
@@ -1617,8 +1630,8 @@ alloc_step_batch(struct workload *wrk, struct w_step *w)
 	printf("%u: %u:|", w->idx, w->eb.buffer_count);
 	for (i = 0; i <= j; i++)
 		printf("%x|", w->obj[i].handle);
-	printf(" %10lu flags=%llx bb=%x[%u] ctx[%u]=%u\n",
-		w->bb_sz, w->eb.flags, w->bb_handle, j, w->context,
+	printf(" flags=%llx bb=%x[%u] ctx[%u]=%u\n",
+		w->eb.flags, w->bb_handle, j, w->context,
 		get_ctxid(wrk, w));
 #endif
 }
@@ -1803,7 +1816,7 @@ static void measure_active_set(struct workload *wrk)
 		if (w->type != BATCH)
 			continue;
 
-		batch_sizes += w->bb_sz;
+		batch_sizes += 4096;
 
 		for (j = 0; j < w->data_deps.nr; j++) {
 			struct dep_entry *dep = &w->data_deps.list[j];
@@ -1904,6 +1917,10 @@ static int prepare_workload(unsigned int id, struct workload *wrk)
 					wsim_err("Load balancing needs an engine map!\n");
 					return 1;
 				}
+				if (intel_gen(intel_get_drm_devid(fd)) < 11) {
+					wsim_err("Load balancing needs relative mmio support, gen11+!\n");
+					return 1;
+				}
 				ctx->load_balance = w->load_balance;
 			} else if (w->type == BOND) {
 				if (!ctx->load_balance) {
@@ -2163,15 +2180,17 @@ static int elapsed_us(const struct timespec *start, const struct timespec *end)
 }
 
 static void
-update_bb_start(struct w_step *w)
+update_bb_start(struct workload *wrk, struct w_step *w)
 {
-	if (!w->unbound_duration)
-		return;
+	uint32_t ticks;
 
 	gem_set_domain(fd, w->bb_handle,
 		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
 
-	*w->recursive_bb_start = MI_BATCH_BUFFER_START | (1 << 8) | 1;
+	ticks = 0;
+	if (!w->unbound_duration)
+		ticks = ~ns_to_ticks(1000 * get_duration(wrk, w));
+	*w->bb_duration = ticks;
 }
 
 static void w_sync_to(struct workload *wrk, struct w_step *w, int target)
@@ -2198,13 +2217,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine)
 	unsigned int i;
 
 	eb_update_flags(wrk, w, engine);
-	update_bb_start(w);
-
-	w->eb.batch_start_offset =
-		w->unbound_duration ?
-		0 :
-		ALIGN(w->bb_sz - get_bb_sz(w, get_duration(wrk, w)),
-		      2 * sizeof(uint32_t));
+	update_bb_start(wrk, w);
 
 	for (i = 0; i < w->fence_deps.nr; i++) {
 		int tgt = w->idx + w->fence_deps.list[i].target;
@@ -2353,8 +2366,7 @@ static void *run_workload(void *data)
 				igt_assert(wrk->steps[t_idx].type == BATCH);
 				igt_assert(wrk->steps[t_idx].unbound_duration);
 
-				*wrk->steps[t_idx].recursive_bb_start =
-					MI_BATCH_BUFFER_END;
+				*wrk->steps[t_idx].bb_duration = 0xffffffff;
 				__sync_synchronize();
 				continue;
 			} else if (w->type == SSEU) {
@@ -2467,131 +2479,15 @@ static void fini_workload(struct workload *wrk)
 	free(wrk);
 }
 
-static unsigned long calibrate_nop(unsigned int tolerance_pct, struct intel_execution_engine2 *engine)
-{
-	const uint32_t bbe = 0xa << 23;
-	unsigned int loops = 17;
-	unsigned int usecs = nop_calibration_us;
-	struct drm_i915_gem_exec_object2 obj = {};
-	struct drm_i915_gem_execbuffer2 eb = {
-		.buffer_count = 1,
-		.buffers_ptr = (uintptr_t)&obj,
-		.flags = engine->flags
-	};
-	long size, last_size;
-	struct timespec t_0, t_end;
-
-	clock_gettime(CLOCK_MONOTONIC, &t_0);
-
-	size = 256 * 1024;
-	do {
-		struct timespec t_start;
-
-		obj.handle = alloc_bo(fd, size);
-		gem_write(fd, obj.handle, size - sizeof(bbe), &bbe,
-			  sizeof(bbe));
-		gem_execbuf(fd, &eb);
-		gem_sync(fd, obj.handle);
-
-		clock_gettime(CLOCK_MONOTONIC, &t_start);
-		for (int loop = 0; loop < loops; loop++)
-			gem_execbuf(fd, &eb);
-		gem_sync(fd, obj.handle);
-		clock_gettime(CLOCK_MONOTONIC, &t_end);
-
-		gem_close(fd, obj.handle);
-
-		last_size = size;
-		size = loops * size / elapsed(&t_start, &t_end) / 1e6 * usecs;
-		size = ALIGN(size, sizeof(uint32_t));
-	} while (elapsed(&t_0, &t_end) < 5 ||
-		 labs(size - last_size) > (size * tolerance_pct / 100));
-
-	return size / sizeof(uint32_t);
-}
-
-static void
-calibrate_sequentially(void)
-{
-	struct intel_execution_engine2 *engine;
-	enum intel_engine_id eng_id;
-
-	__for_each_physical_engine(fd, engine) {
-		eng_id = ci_to_engine_id(engine->class, engine->instance);
-		igt_assert(eng_id >= 0);
-		engine_calib_map[eng_id] = calibrate_nop(fd, engine);
-	}
-}
-
-struct thread_data {
-	struct intel_execution_engine2 *eng;
-	pthread_t thr;
-	unsigned long calib;
-};
-
-static void *
-engine_calibration_thread(void *data)
-{
-	struct thread_data *thr_d = (struct thread_data *) data;
-
-	thr_d->calib = calibrate_nop(fd, thr_d->eng);
-	return NULL;
-}
-
-static void
-calibrate_in_parallel(void)
-{
-	struct thread_data *thr_d = malloc(NUM_ENGINES * sizeof(*thr_d));
-	struct intel_execution_engine2 *engine;
-	enum intel_engine_id id;
-	int ret;
-
-	__for_each_physical_engine(fd, engine) {
-		id = ci_to_engine_id(engine->class, engine->instance);
-		thr_d[id].eng = engine;
-		ret = pthread_create(&thr_d[id].thr, NULL, engine_calibration_thread, &thr_d[id]);
-		igt_assert_eq(ret, 0);
-	}
-
-	__for_each_physical_engine(fd, engine) {
-		id = ci_to_engine_id(engine->class, engine->instance);
-		igt_assert(id >= 0);
-
-		ret = pthread_join(thr_d[id].thr, NULL);
-		igt_assert_eq(ret, 0);
-		engine_calib_map[id] = thr_d[id].calib;
-	}
-
-	free(thr_d);
-}
-
-static void
-calibrate_engines(void)
-{
-	if (sequential)
-		calibrate_sequentially();
-	else
-		calibrate_in_parallel();
-}
-
 static void print_help(void)
 {
 	puts(
 "Usage: gem_wsim [OPTIONS]\n"
 "\n"
 "Runs a simulated workload on the GPU.\n"
-"When ran without arguments performs a GPU calibration result of which needs to\n"
-"be provided when running the simulation in subsequent invocations.\n"
-"\n"
 "Options:\n"
 "  -h                This text.\n"
 "  -q                Be quiet - do not output anything to stdout.\n"
-"  -n <n |           Nop calibration value - single value is set to all engines\n"
-"  e1=v1,e2=v2,n...> without specified value; you can also specify calibrations for\n"
-"                    particular engines.\n"
-"  -t <n>            Nop calibration tolerance percentage.\n"
-"  -T                Disable sequential calibration and perform calibration in parallel.\n"
-"                    Use when there is a difficulty obtaining calibration with the\n"
 "                    default settings.\n"
 "  -I <n>            Initial randomness seed.\n"
 "  -p <n>            Context priority to use for the following workload on the\n"
@@ -2671,17 +2567,12 @@ int main(int argc, char **argv)
 	int master_workload = -1;
 	char *append_workload_arg = NULL;
 	struct w_arg *w_args = NULL;
-	unsigned int tolerance_pct = 1;
 	int exitcode = EXIT_FAILURE;
 	double scale_time = 1.0f;
 	double scale_dur = 1.0f;
 	int prio = 0;
 	double t;
 	int i, c;
-	char *subopts, *value;
-	int raw_number = 0;
-	long calib_val;
-	int eng;
 
 	/*
 	 * Open the device via the low-level API so we can do the GPU quiesce
@@ -2721,70 +2612,7 @@ int main(int argc, char **argv)
 		case 'c':
 			clients = strtol(optarg, NULL, 0);
 			break;
-		case 't':
-			tolerance_pct = strtol(optarg, NULL, 0);
-			break;
-		case 'T':
-			sequential = false;
-			break;
-
-		case 'n':
-			subopts = optarg;
-			while (*subopts != '\0') {
-				eng = getsubopt(&subopts, (char **)ring_str_map, &value);
-				if (!value) {
-					/* only engine name was given */
-					wsim_err("Missing calibration value for '%s'!\n",
-						ring_str_map[eng]);
-					goto err;
-				}
 
-				calib_val = atol(value);
-
-				if (eng >= 0 && eng < NUM_ENGINES) {
-				/* engine name with some value were given */
-
-					if (eng == DEFAULT || eng == VCS) {
-						wsim_err("'%s' not allowed in engine calibrations!\n",
-							ring_str_map[eng]);
-						goto err;
-					} else if (calib_val <= 0) {
-						wsim_err("Invalid calibration for engine '%s' - value "
-						"is either non-positive or is not a number!\n",
-							ring_str_map[eng]);
-						goto err;
-					} else if (engine_calib_map[eng]) {
-						wsim_err("Invalid repeated calibration of '%s'!\n",
-							ring_str_map[eng]);
-						goto err;
-					} else {
-						engine_calib_map[eng] = calib_val;
-						if (eng == RCS)
-							engine_calib_map[DEFAULT] = calib_val;
-						else if (eng == VCS1 || eng == VCS2)
-							engine_calib_map[VCS] = calib_val;
-						has_nop_calibration = true;
-					}
-				} else {
-					/* raw number was given */
-
-					if (!calib_val) {
-						wsim_err("Invalid engine or zero calibration!\n");
-						goto err;
-					} else if (calib_val < 0) {
-						wsim_err("Invalid negative calibration!\n");
-						goto err;
-					} else if (raw_number) {
-						wsim_err("Default engine calibration provided more than once!\n");
-						goto err;
-					} else {
-						raw_number = calib_val;
-						apply_unset_calibrations(raw_number);
-						has_nop_calibration = true;
-					}
-				}
-			}
-			break;
 		case 'r':
 			repeat = strtol(optarg, NULL, 0);
 			break;
@@ -2812,6 +2640,9 @@ int main(int argc, char **argv)
 		case 'F':
 			scale_time = atof(optarg);
 			break;
+		case 'n':
+			/* ignored; using HW timers */
+			break;
 		case 'h':
 			print_help();
 			goto out;
@@ -2820,17 +2651,15 @@ int main(int argc, char **argv)
 		}
 	}
 
-	if (!has_nop_calibration) {
-		if (verbose > 1) {
-			printf("Calibrating nop delays with %u%% tolerance...\n",
-				tolerance_pct);
-		}
-
-		calibrate_engines();
+	if (intel_gen(intel_get_drm_devid(fd)) < 8) {
+		wsim_err("gen8+ is required for CTX_TIMESTAMP timers\n");
+		return 1;
+	}
 
-		if (verbose)
-			print_engine_calibrations();
-		goto out;
+	ts_frequency = read_timestamp_frequency(fd);
+	if (!ts_frequency) {
+		wsim_err("Unable to read timestamp frequency of GPU\n");
+		goto err;
 	}
 
 	if (!nr_w_args) {
@@ -2885,7 +2714,6 @@ int main(int argc, char **argv)
 
 	if (verbose > 1) {
 		printf("Random seed is %u.\n", master_prng);
-		print_engine_calibrations();
 		printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
 	}
 
-- 
2.27.0

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for gem_wsim: Use CTX_TIMESTAMP for timed spinners
  2020-07-14 12:36 ` [igt-dev] " Chris Wilson
  (?)
@ 2020-07-14 13:09 ` Patchwork
  -1 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2020-07-14 13:09 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev


[-- Attachment #1.1: Type: text/plain, Size: 6498 bytes --]

== Series Details ==

Series: gem_wsim: Use CTX_TIMESTAMP for timed spinners
URL   : https://patchwork.freedesktop.org/series/79472/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8744 -> IGTPW_4762
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/index.html

Known issues
------------

  Here are the changes found in IGTPW_4762 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s0:
    - fi-tgl-u2:          [PASS][1] -> [FAIL][2] ([i915#1888])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-tgl-u2/igt@gem_exec_suspend@basic-s0.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-tgl-u2/igt@gem_exec_suspend@basic-s0.html

  * igt@gem_exec_suspend@basic-s3:
    - fi-tgl-y:           [PASS][3] -> [DMESG-WARN][4] ([i915#402]) +2 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-tgl-y/igt@gem_exec_suspend@basic-s3.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-tgl-y/igt@gem_exec_suspend@basic-s3.html

  * igt@i915_selftest@live@gem_contexts:
    - fi-tgl-u2:          [PASS][5] -> [INCOMPLETE][6] ([i915#2045])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-tgl-u2/igt@i915_selftest@live@gem_contexts.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-tgl-u2/igt@i915_selftest@live@gem_contexts.html

  
#### Possible fixes ####

  * igt@gem_flink_basic@basic:
    - fi-tgl-y:           [DMESG-WARN][7] ([i915#402]) -> [PASS][8] +1 similar issue
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-tgl-y/igt@gem_flink_basic@basic.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-tgl-y/igt@gem_flink_basic@basic.html

  * igt@i915_module_load@reload:
    - fi-tgl-u2:          [DMESG-WARN][9] ([i915#402]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-tgl-u2/igt@i915_module_load@reload.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-tgl-u2/igt@i915_module_load@reload.html

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - fi-bsw-kefka:       [DMESG-WARN][11] ([i915#1982]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-bsw-kefka/igt@i915_pm_rpm@basic-pci-d3-state.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-bsw-kefka/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@i915_pm_rpm@module-reload:
    - fi-kbl-guc:         [FAIL][13] ([i915#579]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-kbl-guc/igt@i915_pm_rpm@module-reload.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-kbl-guc/igt@i915_pm_rpm@module-reload.html

  * igt@kms_busy@basic@flip:
    - fi-tgl-y:           [DMESG-WARN][15] ([i915#1982]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-tgl-y/igt@kms_busy@basic@flip.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-tgl-y/igt@kms_busy@basic@flip.html

  * igt@kms_flip@basic-flip-vs-wf_vblank@c-edp1:
    - fi-icl-u2:          [DMESG-WARN][17] ([i915#1982]) -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-icl-u2/igt@kms_flip@basic-flip-vs-wf_vblank@c-edp1.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-icl-u2/igt@kms_flip@basic-flip-vs-wf_vblank@c-edp1.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence:
    - fi-cml-s:           [DMESG-WARN][19] ([i915#1982]) -> [PASS][20]
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-cml-s/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-cml-s/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html

  
#### Warnings ####

  * igt@gem_exec_suspend@basic-s0:
    - fi-kbl-x1275:       [DMESG-WARN][21] ([i915#1982] / [i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][22] ([i915#62] / [i915#92])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-kbl-x1275/igt@gem_exec_suspend@basic-s0.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-kbl-x1275/igt@gem_exec_suspend@basic-s0.html

  * igt@kms_force_connector_basic@force-edid:
    - fi-kbl-x1275:       [DMESG-WARN][23] ([i915#62] / [i915#92]) -> [DMESG-WARN][24] ([i915#62] / [i915#92] / [i915#95]) +3 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-kbl-x1275/igt@kms_force_connector_basic@force-edid.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-kbl-x1275/igt@kms_force_connector_basic@force-edid.html

  * igt@prime_vgem@basic-fence-flip:
    - fi-kbl-x1275:       [DMESG-WARN][25] ([i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][26] ([i915#62] / [i915#92]) +3 similar issues
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/fi-kbl-x1275/igt@prime_vgem@basic-fence-flip.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/fi-kbl-x1275/igt@prime_vgem@basic-fence-flip.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2045]: https://gitlab.freedesktop.org/drm/intel/issues/2045
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#579]: https://gitlab.freedesktop.org/drm/intel/issues/579
  [i915#62]: https://gitlab.freedesktop.org/drm/intel/issues/62
  [i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (46 -> 39)
------------------------------

  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5735 -> IGTPW_4762

  CI-20190529: 20190529
  CI_DRM_8744: beb1c0b42c5368a48e782e5556be95c8332d28c6 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_4762: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/index.html
  IGT_5735: 21f8204e54c122e4a0f8ca4b59e4b2db8d1ba687 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/index.html

[-- Attachment #1.2: Type: text/html, Size: 8594 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for gem_wsim: Use CTX_TIMESTAMP for timed spinners
  2020-07-14 12:36 ` [igt-dev] " Chris Wilson
  (?)
  (?)
@ 2020-07-14 16:05 ` Patchwork
  -1 siblings, 0 replies; 7+ messages in thread
From: Patchwork @ 2020-07-14 16:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev


[-- Attachment #1.1: Type: text/plain, Size: 24009 bytes --]

== Series Details ==

Series: gem_wsim: Use CTX_TIMESTAMP for timed spinners
URL   : https://patchwork.freedesktop.org/series/79472/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8744_full -> IGTPW_4762_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_4762_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_4762_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_4762_full:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_dp_aux_dev:
    - shard-iclb:         [PASS][1] -> [DMESG-WARN][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-iclb6/igt@kms_dp_aux_dev.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-iclb7/igt@kms_dp_aux_dev.html

  
Known issues
------------

  Here are the changes found in IGTPW_4762_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_eio@unwedge-stress:
    - shard-hsw:          [PASS][3] -> [TIMEOUT][4] ([i915#1958] / [i915#2119]) +1 similar issue
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-hsw7/igt@gem_eio@unwedge-stress.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-hsw7/igt@gem_eio@unwedge-stress.html

  * igt@gem_eio@wait-wedge-10ms:
    - shard-apl:          [PASS][5] -> [DMESG-WARN][6] ([i915#1635] / [i915#95]) +30 similar issues
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl1/igt@gem_eio@wait-wedge-10ms.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl7/igt@gem_eio@wait-wedge-10ms.html

  * igt@gen9_exec_parse@allowed-all:
    - shard-apl:          [PASS][7] -> [DMESG-WARN][8] ([i915#1436] / [i915#1635] / [i915#716])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl7/igt@gen9_exec_parse@allowed-all.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl1/igt@gen9_exec_parse@allowed-all.html

  * igt@kms_big_fb@y-tiled-64bpp-rotate-0:
    - shard-glk:          [PASS][9] -> [DMESG-FAIL][10] ([i915#118] / [i915#95])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-glk1/igt@kms_big_fb@y-tiled-64bpp-rotate-0.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-glk8/igt@kms_big_fb@y-tiled-64bpp-rotate-0.html

  * igt@kms_cursor_crc@pipe-a-cursor-64x21-onscreen:
    - shard-kbl:          [PASS][11] -> [DMESG-FAIL][12] ([i915#54] / [i915#95]) +2 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl7/igt@kms_cursor_crc@pipe-a-cursor-64x21-onscreen.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl4/igt@kms_cursor_crc@pipe-a-cursor-64x21-onscreen.html

  * igt@kms_cursor_edge_walk@pipe-b-256x256-left-edge:
    - shard-glk:          [PASS][13] -> [DMESG-WARN][14] ([i915#1982]) +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-glk8/igt@kms_cursor_edge_walk@pipe-b-256x256-left-edge.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-glk6/igt@kms_cursor_edge_walk@pipe-b-256x256-left-edge.html

  * igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-untiled:
    - shard-apl:          [PASS][15] -> [DMESG-FAIL][16] ([i915#1635] / [i915#54] / [i915#95])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl7/igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-untiled.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl1/igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-untiled.html

  * igt@kms_draw_crc@fill-fb:
    - shard-apl:          [PASS][17] -> [DMESG-FAIL][18] ([i915#1635] / [i915#95]) +1 similar issue
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl1/igt@kms_draw_crc@fill-fb.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl1/igt@kms_draw_crc@fill-fb.html

  * igt@kms_flip@flip-vs-suspend@c-hdmi-a1:
    - shard-hsw:          [PASS][19] -> [INCOMPLETE][20] ([i915#2055])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-hsw2/igt@kms_flip@flip-vs-suspend@c-hdmi-a1.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-hsw1/igt@kms_flip@flip-vs-suspend@c-hdmi-a1.html

  * igt@kms_frontbuffer_tracking@fbc-1p-indfb-fliptrack:
    - shard-tglb:         [PASS][21] -> [DMESG-WARN][22] ([i915#1982]) +1 similar issue
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-tglb8/igt@kms_frontbuffer_tracking@fbc-1p-indfb-fliptrack.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-tglb7/igt@kms_frontbuffer_tracking@fbc-1p-indfb-fliptrack.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-mmap-wc:
    - shard-tglb:         [PASS][23] -> [SKIP][24] ([i915#668]) +5 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-tglb2/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-mmap-wc.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-tglb1/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-mmap-wc.html

  * igt@kms_hdr@bpc-switch-suspend:
    - shard-kbl:          [PASS][25] -> [DMESG-WARN][26] ([i915#93] / [i915#95]) +36 similar issues
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl1/igt@kms_hdr@bpc-switch-suspend.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl4/igt@kms_hdr@bpc-switch-suspend.html

  * igt@kms_plane_cursor@pipe-a-overlay-size-256:
    - shard-kbl:          [PASS][27] -> [DMESG-FAIL][28] ([i915#95])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl6/igt@kms_plane_cursor@pipe-a-overlay-size-256.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl3/igt@kms_plane_cursor@pipe-a-overlay-size-256.html

  * igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping:
    - shard-iclb:         [PASS][29] -> [DMESG-WARN][30] ([i915#1982]) +1 similar issue
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-iclb8/igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-iclb2/igt@kms_plane_scaling@pipe-a-scaler-with-clipping-clamping.html

  * igt@kms_vblank@pipe-a-ts-continuation-suspend:
    - shard-kbl:          [PASS][31] -> [DMESG-WARN][32] ([i915#180]) +1 similar issue
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl6/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl7/igt@kms_vblank@pipe-a-ts-continuation-suspend.html

  * igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend:
    - shard-kbl:          [PASS][33] -> [INCOMPLETE][34] ([i915#155])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl2/igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl4/igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend.html

  * igt@kms_vblank@pipe-b-ts-continuation-modeset-rpm:
    - shard-tglb:         [PASS][35] -> [DMESG-WARN][36] ([i915#402]) +3 similar issues
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-tglb2/igt@kms_vblank@pipe-b-ts-continuation-modeset-rpm.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-tglb3/igt@kms_vblank@pipe-b-ts-continuation-modeset-rpm.html

  * igt@perf@blocking-parameterized:
    - shard-iclb:         [PASS][37] -> [FAIL][38] ([i915#1542])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-iclb1/igt@perf@blocking-parameterized.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-iclb1/igt@perf@blocking-parameterized.html

  * igt@perf@polling-parameterized:
    - shard-tglb:         [PASS][39] -> [FAIL][40] ([i915#1542])
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-tglb8/igt@perf@polling-parameterized.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-tglb1/igt@perf@polling-parameterized.html

  
#### Possible fixes ####

  * igt@gem_exec_fence@syncobj-unused-fence:
    - shard-apl:          [DMESG-WARN][41] ([i915#1635] / [i915#95]) -> [PASS][42] +33 similar issues
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl4/igt@gem_exec_fence@syncobj-unused-fence.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl8/igt@gem_exec_fence@syncobj-unused-fence.html

  * igt@gem_exec_whisper@basic-contexts-priority-all:
    - shard-glk:          [DMESG-WARN][43] ([i915#118] / [i915#95]) -> [PASS][44] +1 similar issue
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-glk4/igt@gem_exec_whisper@basic-contexts-priority-all.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-glk3/igt@gem_exec_whisper@basic-contexts-priority-all.html

  * igt@gen9_exec_parse@allowed-all:
    - shard-glk:          [DMESG-WARN][45] ([i915#1436] / [i915#716]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-glk3/igt@gen9_exec_parse@allowed-all.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-glk7/igt@gen9_exec_parse@allowed-all.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [FAIL][47] ([i915#454]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-iclb6/igt@i915_pm_dc@dc6-psr.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-iclb5/igt@i915_pm_dc@dc6-psr.html

  * igt@kms_big_fb@linear-64bpp-rotate-180:
    - shard-glk:          [DMESG-FAIL][49] ([i915#118] / [i915#95]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-glk8/igt@kms_big_fb@linear-64bpp-rotate-180.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-glk4/igt@kms_big_fb@linear-64bpp-rotate-180.html

  * igt@kms_color@pipe-a-ctm-blue-to-red:
    - shard-kbl:          [DMESG-WARN][51] ([i915#93] / [i915#95]) -> [PASS][52] +44 similar issues
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl3/igt@kms_color@pipe-a-ctm-blue-to-red.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl4/igt@kms_color@pipe-a-ctm-blue-to-red.html

  * igt@kms_cursor_crc@pipe-a-cursor-256x256-random:
    - shard-kbl:          [DMESG-FAIL][53] ([i915#54] / [i915#95]) -> [PASS][54] +2 similar issues
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl3/igt@kms_cursor_crc@pipe-a-cursor-256x256-random.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl7/igt@kms_cursor_crc@pipe-a-cursor-256x256-random.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-kbl:          [DMESG-WARN][55] ([i915#180]) -> [PASS][56] +5 similar issues
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl3/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl4/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_frontbuffer_tracking@psr-rgb565-draw-mmap-gtt:
    - shard-tglb:         [DMESG-WARN][57] ([i915#1982]) -> [PASS][58]
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-tglb1/igt@kms_frontbuffer_tracking@psr-rgb565-draw-mmap-gtt.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-tglb5/igt@kms_frontbuffer_tracking@psr-rgb565-draw-mmap-gtt.html

  * igt@kms_plane@plane-position-covered-pipe-b-planes:
    - shard-kbl:          [FAIL][59] ([i915#247]) -> [PASS][60]
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl3/igt@kms_plane@plane-position-covered-pipe-b-planes.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl4/igt@kms_plane@plane-position-covered-pipe-b-planes.html
    - shard-apl:          [FAIL][61] ([i915#1635] / [i915#247]) -> [PASS][62]
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl4/igt@kms_plane@plane-position-covered-pipe-b-planes.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl3/igt@kms_plane@plane-position-covered-pipe-b-planes.html

  * igt@kms_plane_cursor@pipe-a-viewport-size-128:
    - shard-apl:          [DMESG-FAIL][63] ([i915#1635] / [i915#95]) -> [PASS][64] +1 similar issue
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl6/igt@kms_plane_cursor@pipe-a-viewport-size-128.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl1/igt@kms_plane_cursor@pipe-a-viewport-size-128.html
    - shard-kbl:          [DMESG-FAIL][65] ([i915#95]) -> [PASS][66] +1 similar issue
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl3/igt@kms_plane_cursor@pipe-a-viewport-size-128.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl3/igt@kms_plane_cursor@pipe-a-viewport-size-128.html

  * igt@kms_psr@psr2_sprite_blt:
    - shard-iclb:         [SKIP][67] ([fdo#109441]) -> [PASS][68]
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-iclb4/igt@kms_psr@psr2_sprite_blt.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-iclb2/igt@kms_psr@psr2_sprite_blt.html

  * igt@kms_setmode@basic:
    - shard-kbl:          [FAIL][69] ([i915#31]) -> [PASS][70]
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl4/igt@kms_setmode@basic.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl1/igt@kms_setmode@basic.html

  * igt@kms_vblank@pipe-a-query-busy-hang:
    - shard-hsw:          [TIMEOUT][71] ([i915#1958] / [i915#2119]) -> [PASS][72]
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-hsw2/igt@kms_vblank@pipe-a-query-busy-hang.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-hsw7/igt@kms_vblank@pipe-a-query-busy-hang.html
    - shard-snb:          [TIMEOUT][73] ([i915#1958] / [i915#2119]) -> [PASS][74]
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-snb1/igt@kms_vblank@pipe-a-query-busy-hang.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-snb1/igt@kms_vblank@pipe-a-query-busy-hang.html

  * igt@kms_vblank@pipe-c-ts-continuation-dpms-suspend:
    - shard-iclb:         [INCOMPLETE][75] ([i915#1185]) -> [PASS][76]
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-iclb3/igt@kms_vblank@pipe-c-ts-continuation-dpms-suspend.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-iclb6/igt@kms_vblank@pipe-c-ts-continuation-dpms-suspend.html

  * igt@testdisplay:
    - shard-kbl:          [TIMEOUT][77] ([i915#1692] / [i915#1958] / [i915#2119]) -> [PASS][78]
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl2/igt@testdisplay.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl7/igt@testdisplay.html

  
#### Warnings ####

  * igt@gem_exec_balancer@full-late:
    - shard-hsw:          [SKIP][79] ([fdo#109271]) -> [TIMEOUT][80] ([i915#1958] / [i915#2119]) +2 similar issues
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-hsw7/igt@gem_exec_balancer@full-late.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-hsw2/igt@gem_exec_balancer@full-late.html

  * igt@gen9_exec_parse@bb-start-far:
    - shard-hsw:          [INCOMPLETE][81] ([i915#1958]) -> [SKIP][82] ([fdo#109271])
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-hsw2/igt@gen9_exec_parse@bb-start-far.html
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-hsw7/igt@gen9_exec_parse@bb-start-far.html
    - shard-snb:          [INCOMPLETE][83] ([i915#82]) -> [SKIP][84] ([fdo#109271])
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-snb1/igt@gen9_exec_parse@bb-start-far.html
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-snb6/igt@gen9_exec_parse@bb-start-far.html

  * igt@kms_content_protection@lic:
    - shard-kbl:          [TIMEOUT][85] ([i915#1319] / [i915#1958] / [i915#2119]) -> [TIMEOUT][86] ([i915#1319] / [i915#2119])
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl6/igt@kms_content_protection@lic.html
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl6/igt@kms_content_protection@lic.html

  * igt@kms_content_protection@uevent:
    - shard-kbl:          [FAIL][87] ([i915#2105]) -> [DMESG-FAIL][88] ([i915#95])
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl7/igt@kms_content_protection@uevent.html
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl4/igt@kms_content_protection@uevent.html
    - shard-apl:          [FAIL][89] ([i915#1635] / [i915#2105]) -> [DMESG-FAIL][90] ([i915#1635] / [i915#95])
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl2/igt@kms_content_protection@uevent.html
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl8/igt@kms_content_protection@uevent.html

  * igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-pri-shrfb-draw-blt:
    - shard-snb:          [SKIP][91] ([fdo#109271]) -> [TIMEOUT][92] ([i915#1958] / [i915#2119]) +3 similar issues
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-snb5/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-pri-shrfb-draw-blt.html
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-snb4/igt@kms_frontbuffer_tracking@fbc-2p-scndscrn-pri-shrfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@psr-1p-pri-indfb-multidraw:
    - shard-tglb:         [DMESG-WARN][93] ([i915#1982]) -> [SKIP][94] ([i915#668])
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-tglb6/igt@kms_frontbuffer_tracking@psr-1p-pri-indfb-multidraw.html
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-tglb1/igt@kms_frontbuffer_tracking@psr-1p-pri-indfb-multidraw.html

  * igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb:
    - shard-apl:          [FAIL][95] ([fdo#108145] / [i915#1635] / [i915#265]) -> [DMESG-FAIL][96] ([fdo#108145] / [i915#1635] / [i915#95])
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl2/igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb.html
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl2/igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb:
    - shard-apl:          [DMESG-FAIL][97] ([fdo#108145] / [i915#1635] / [i915#95]) -> [FAIL][98] ([fdo#108145] / [i915#1635] / [i915#265])
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl6/igt@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb.html
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl1/igt@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb.html
    - shard-kbl:          [DMESG-FAIL][99] ([fdo#108145] / [i915#95]) -> [FAIL][100] ([fdo#108145] / [i915#265])
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl6/igt@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb.html
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl3/igt@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb:
    - shard-kbl:          [FAIL][101] ([i915#265]) -> [DMESG-FAIL][102] ([i915#95])
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-kbl1/igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb.html
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-kbl4/igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb.html

  * igt@kms_plane_cursor@pipe-d-overlay-size-128:
    - shard-hsw:          [TIMEOUT][103] ([i915#1958] / [i915#2119]) -> [SKIP][104] ([fdo#109271]) +2 similar issues
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-hsw2/igt@kms_plane_cursor@pipe-d-overlay-size-128.html
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-hsw7/igt@kms_plane_cursor@pipe-d-overlay-size-128.html
    - shard-snb:          [TIMEOUT][105] ([i915#1958] / [i915#2119]) -> [SKIP][106] ([fdo#109271]) +2 similar issues
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-snb1/igt@kms_plane_cursor@pipe-d-overlay-size-128.html
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-snb1/igt@kms_plane_cursor@pipe-d-overlay-size-128.html

  * igt@runner@aborted:
    - shard-apl:          ([FAIL][107], [FAIL][108]) ([i915#1610] / [i915#1635] / [i915#2110]) -> ([FAIL][109], [FAIL][110]) ([fdo#109271] / [i915#1635] / [i915#2110] / [i915#716])
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl4/igt@runner@aborted.html
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8744/shard-apl4/igt@runner@aborted.html
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl1/igt@runner@aborted.html
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/shard-apl8/igt@runner@aborted.html

  
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1185]: https://gitlab.freedesktop.org/drm/intel/issues/1185
  [i915#1319]: https://gitlab.freedesktop.org/drm/intel/issues/1319
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#155]: https://gitlab.freedesktop.org/drm/intel/issues/155
  [i915#1610]: https://gitlab.freedesktop.org/drm/intel/issues/1610
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#1692]: https://gitlab.freedesktop.org/drm/intel/issues/1692
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1958]: https://gitlab.freedesktop.org/drm/intel/issues/1958
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2055]: https://gitlab.freedesktop.org/drm/intel/issues/2055
  [i915#2105]: https://gitlab.freedesktop.org/drm/intel/issues/2105
  [i915#2110]: https://gitlab.freedesktop.org/drm/intel/issues/2110
  [i915#2119]: https://gitlab.freedesktop.org/drm/intel/issues/2119
  [i915#247]: https://gitlab.freedesktop.org/drm/intel/issues/247
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#668]: https://gitlab.freedesktop.org/drm/intel/issues/668
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#82]: https://gitlab.freedesktop.org/drm/intel/issues/82
  [i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (11 -> 8)
------------------------------

  Missing    (3): pig-skl-6260u pig-glk-j5005 pig-icl-1065g7 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5735 -> IGTPW_4762
  * Piglit: piglit_4509 -> None

  CI-20190529: 20190529
  CI_DRM_8744: beb1c0b42c5368a48e782e5556be95c8332d28c6 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_4762: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/index.html
  IGT_5735: 21f8204e54c122e4a0f8ca4b59e4b2db8d1ba687 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_4762/index.html

[-- Attachment #1.2: Type: text/html, Size: 31775 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t] gem_wsim: Use CTX_TIMESTAMP for timed spinners
  2020-11-02 17:14 ` Tvrtko Ursulin
@ 2020-11-02 18:13   ` Chris Wilson
  0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2020-11-02 18:13 UTC (permalink / raw)
  To: Tvrtko Ursulin, igt-dev; +Cc: intel-gfx

Quoting Tvrtko Ursulin (2020-11-02 17:14:24)
> 
> On 02/11/2020 15:33, Chris Wilson wrote:
> > +     if (!f) {
> > +             f = read_timestamp_frequency(fd);
> > +             if (intel_gen(intel_get_drm_devid(fd)) == 11)
> > +                     f = 12500000; /* icl!!! are you feeling alright? */
> 
> What does the comment refer to?

Icelake seems to be using a constant clock for the timestamp that is not
related to the GT clock. This feature is undocumented, but consistent
across all 3 of the icl I have access to. There's a selftest kicking
around for the issue.
 
> Should there be an assert here if < gen11?

CTX_TIMESTAMP is present on gen8+, and so long as we don't use a load
balancer, the tests should run. There's a bail for gen11 +
load-balancing later on.

> > +     return gem_engine_mmio_base(i915, name);
> 
> Why is mmio base needed if relative addressing is used? Maybe I'll 
> figure it out after reading further.

If we are not using relative addressing, we need the mmio base (and the
bit is ignored).

If we are using relative addressing, the offset must be 0.

The intention is to keep working on older platforms, for simple tests at
least. (Quite useful for verifying the logic.)

> > -static unsigned int terminate_bb(struct w_step *w)
> > +static unsigned int create_bb(struct w_step *w, int self)
> >   {
> > -     const uint32_t bbe = 0xa << 23;
> > -     unsigned long mmap_start, mmap_len;
> > -     unsigned long batch_start = w->bb_sz;
> > +     const int gen = intel_gen(intel_get_drm_devid(fd));
> > +     const uint32_t base = mmio_base(fd, w->engine, gen);
> > +#define CS_GPR(x) (base + 0x600 + 8 * (x))
> > +#define TIMESTAMP (base + 0x3a8)
> > +     const int use_64b = gen >= 8;
> > +     enum { START_TS, NOW_TS };
> > +     uint32_t *ptr, *cs, *jmp;
> >       unsigned int r = 0;
> > -     uint32_t *ptr, *cs;
> > -
> > -     batch_start -= sizeof(uint32_t); /* bbend */
> > -
> > -     if (w->unbound_duration)
> > -             batch_start -= 4 * sizeof(uint32_t); /* MI_ARB_CHK + MI_BATCH_BUFFER_START */
> > -
> > -     mmap_start = rounddown(batch_start, PAGE_SIZE);
> > -     mmap_len = ALIGN(w->bb_sz - mmap_start, PAGE_SIZE);
> >   
> >       gem_set_domain(fd, w->bb_handle,
> >                      I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
> >   
> > -     ptr = gem_mmap__wc(fd, w->bb_handle, mmap_start, mmap_len, PROT_WRITE);
> > -     cs = (uint32_t *)((char *)ptr + batch_start - mmap_start);
> > +     cs = ptr = gem_mmap__wc(fd, w->bb_handle, 0, 4096, PROT_WRITE);
> >   
> > -     if (w->unbound_duration) {
> > -             w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t);
> > -             batch_start += 4 * sizeof(uint32_t);
> > +     *cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO_DST;
> > +     *cs++ = CS_GPR(START_TS) + 4;
> 
> What is "+ 4"?

The MI_MATH is 64b, so we need to do 2 dword writes into each GPR.

> > +     *cs++ = 0;
> > +     *cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO_DST | MI_CS_MMIO_SRC;
> > +     *cs++ = TIMESTAMP;
> > +     *cs++ = CS_GPR(START_TS);
> >   
> > -             *cs++ = w->preempt_us ? 0x5 << 23 /* MI_ARB_CHK; */ : MI_NOOP;
> > -             w->recursive_bb_start = cs;
> > -             *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
> > +     if (offset_in_page(cs) & 4)
> >               *cs++ = 0;
> > +     jmp = cs;
> > +
> > +     if (w->preempt_us)
> > +             *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
> > +
> > +     *cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO_DST;
> > +     *cs++ = CS_GPR(NOW_TS) + 4;
> > +     *cs++ = 0;
> > +     *cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO_DST | MI_CS_MMIO_SRC;
> > +     *cs++ = TIMESTAMP;
> > +     *cs++ = CS_GPR(NOW_TS);
> > +
> > +     *cs++ = MI_MATH(4);
> > +     *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
> > +     *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
> 
> MI_MATH_REG is aliased to CS_GPR?

Yes.

> > +     *cs++ = MI_MATH_SUB;
> > +     *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
> > +
> > +     *cs++ = 0x24 << 23 | (1 + use_64b) | MI_CS_MMIO_DST; /* SRM */
> 
> All others have nice defines but SRM, any special reason?

Once upon a time I was lazy, and since then copy'n'pasted.

> > +     *cs++ = CS_GPR(NOW_TS);
> > +     w->reloc[r].target_handle = self;
> > +     w->reloc[r].offset = offset_in_page(cs);
> > +     *cs++ = w->reloc[r].delta = 4000;
> > +     *cs++ = 0;
> > +     r++;
> > +
> > +     /* Delay between SRM and COND_BBE to post the writes */
> > +     for (int n = 0; n < 8; n++) {
> > +             *cs++ = MI_INSTR(0x21, 1);
> > +             *cs++ = 2048;
> >               *cs++ = 0;
> 
> Whats this instruction? Add a define so it is self-documenting?

STORE_INDEX.

> > -     *cs = bbe;
> > +     *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
> > +     w->bb_duration = cs;
> > +     *cs++ = 0;
> > +     w->reloc[r].target_handle = self;
> > +     w->reloc[r].offset = offset_in_page(cs);
> > +     *cs++ = w->reloc[r].delta = 4000;
> > +     *cs++ = 0;
> > +     r++;
> > +
> > +     *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
> > +     w->reloc[r].target_handle = self;
> > +     w->reloc[r].offset = offset_in_page(cs);
> > +     *cs++ = w->reloc[r].delta = offset_in_page(jmp);
> 
> Presumably MI_MATH stuff relaxed the loop enough and we don't need any 
> extra noops?

More than enough.
 
> I would appreaciate a banner style comment explaining the batch layout 
> mentioning the interesting offsets and high-level logic.

There's another variant out there with the block comments. More
copy'n'pasting to come.

> >   static void
> > -update_bb_start(struct w_step *w)
> > +update_bb_start(struct workload *wrk, struct w_step *w)
> 
> I think there is w->wrk if you find it easier but it's only one callsite 
> so it's probably even better like this.
> 
> >   {
> > -     if (!w->unbound_duration)
> > -             return;
> > +     uint32_t ticks;
> >   
> > -     gem_set_domain(fd, w->bb_handle,
> > -                    I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
> > +     ticks = 0;
> > +     if (!w->unbound_duration)
> > +             ticks = ~ns_to_ctx_ticks(1000 * get_duration(wrk, w));
> 
> Hm inverted ticks, why? And since it is not obvious I think it deserves 
> a comment.

We do not get a choice in the MI_DO_COMPARE operation, so need to
convert the values to work with a less-than.

> > @@ -2812,6 +2640,9 @@ int main(int argc, char **argv)
> >               case 'F':
> >                       scale_time = atof(optarg);
> >                       break;
> > +             case 'n':
> > +                     /* ignored; using HW timers */
> > +                     break;
> 
> For what user? I deleted media-bench.pl but maybe you are using it locally?

Yeah, it was still in my scripts. There's no great need to keep it, I
just thought it was convenient to ignore.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Intel-gfx] [PATCH i-g-t] gem_wsim: Use CTX_TIMESTAMP for timed spinners
  2020-11-02 15:33 [Intel-gfx] [PATCH i-g-t] " Chris Wilson
@ 2020-11-02 17:14 ` Tvrtko Ursulin
  2020-11-02 18:13   ` Chris Wilson
  0 siblings, 1 reply; 7+ messages in thread
From: Tvrtko Ursulin @ 2020-11-02 17:14 UTC (permalink / raw)
  To: Chris Wilson, igt-dev; +Cc: intel-gfx


On 02/11/2020 15:33, Chris Wilson wrote:
> Use MI_MATH and MI_COND_BBE we can construct a loop that runs for a
> precise number of clock cycles, as measured by the CTX_TIMESTAMP. We use
> the CTX_TIMESTAMP (as opposed to the CS_TIMESTAMP) so that the elapsed
> time is measured local to the context, and the length of the batch is
> unaffected by preemption. Since the clock ticks at a known frequency, we
> can directly translate the batch durations into cycles and so remove the
> requirement for nop calibration, and the often excessively large nop
> batches.
> 
> The downside to this is that we need to use engine local registers, and
> before gen11 there is no support in the CS for relative mmio and so this
> technique does not support transparent load balancing on a virtual
> engine before Icelake.

I am enthusiastic, just that I don't have a local Gen11+ DUT but that's 
secondary.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   benchmarks/gem_wsim.c | 524 ++++++++++++++----------------------------
>   1 file changed, 169 insertions(+), 355 deletions(-)
> 
> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> index dbb46b9aa..5d67468d1 100644
> --- a/benchmarks/gem_wsim.c
> +++ b/benchmarks/gem_wsim.c
> @@ -176,10 +176,9 @@ struct w_step
>   
>   	struct drm_i915_gem_execbuffer2 eb;
>   	struct drm_i915_gem_exec_object2 *obj;
> -	struct drm_i915_gem_relocation_entry reloc[1];
> -	unsigned long bb_sz;
> +	struct drm_i915_gem_relocation_entry reloc[3];
>   	uint32_t bb_handle;
> -	uint32_t *recursive_bb_start;
> +	uint32_t *bb_duration;
>   };
>   
>   struct ctx {
> @@ -227,10 +226,6 @@ struct workload
>   	unsigned int nrequest[NUM_ENGINES];
>   };
>   
> -static const unsigned int nop_calibration_us = 1000;
> -static bool has_nop_calibration = false;
> -static bool sequential = true;
> -
>   static unsigned int master_prng;
>   
>   static int verbose = 1;
> @@ -253,59 +248,67 @@ static const char *ring_str_map[NUM_ENGINES] = {
>   	[VECS] = "VECS",
>   };
>   
> -/* stores calibrations for particular engines */
> -static unsigned long engine_calib_map[NUM_ENGINES];
> -
> -static enum intel_engine_id
> -ci_to_engine_id(int class, int instance)
> -{
> -	static const struct {
> -		int class;
> -		int instance;
> -		unsigned int id;
> -	} map[] = {
> -		{ I915_ENGINE_CLASS_RENDER, 0, RCS },
> -		{ I915_ENGINE_CLASS_COPY, 0, BCS },
> -		{ I915_ENGINE_CLASS_VIDEO, 0, VCS1 },
> -		{ I915_ENGINE_CLASS_VIDEO, 1, VCS2 },
> -		{ I915_ENGINE_CLASS_VIDEO, 2, VCS2 }, /* FIXME/ICL */
> -		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS },
> +static int read_timestamp_frequency(int i915)
> +{
> +	int value = 0;
> +	drm_i915_getparam_t gp = {
> +		.value = &value,
> +		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
>   	};
> -
> -	unsigned int i;
> -
> -	for (i = 0; i < ARRAY_SIZE(map); i++) {
> -		if (class == map[i].class && instance == map[i].instance)
> -			return map[i].id;
> -	}
> -	return -1;
> +	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
> +	return value;
>   }
>   
> -static void
> -apply_unset_calibrations(unsigned long raw_number)
> +static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
>   {
> -	for (int i = 0; i < NUM_ENGINES; i++)
> -		engine_calib_map[i] += engine_calib_map[i] ? 0 : raw_number;
> +	return (x + y - 1) / y;
>   }
>   
> -static void
> -print_engine_calibrations(void)
> +static uint64_t ns_to_ctx_ticks(uint64_t ns)
>   {
> -	bool first_entry = true;
> +	static long f;
>   
> -	printf("Nop calibration for %uus delay is: ", nop_calibration_us);
> -	for (int i = 0; i < NUM_ENGINES; i++) {
> -		/* skip engines not present and DEFAULT and VCS */
> -		if (i != DEFAULT && i != VCS && engine_calib_map[i]) {
> -			if (first_entry) {
> -				printf("%s=%lu", ring_str_map[i], engine_calib_map[i]);
> -				first_entry = false;
> -			} else {
> -				printf(",%s=%lu", ring_str_map[i], engine_calib_map[i]);
> -			}
> -		}
> +	if (!f) {
> +		f = read_timestamp_frequency(fd);
> +		if (intel_gen(intel_get_drm_devid(fd)) == 11)
> +			f = 12500000; /* icl!!! are you feeling alright? */

What does the comment refer to?

Should there be an assert here if < gen11?

>   	}
> -	printf("\n");
> +
> +	return div64_u64_round_up(ns * f, NSEC_PER_SEC);
> +}
> +
> +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
> +
> +#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
> +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
> +/* Opcodes for MI_MATH_INSTR */
> +#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
> +#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
> +#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
> +#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
> +#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
> +#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
> +#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
> +#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
> +#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
> +#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
> +#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
> +#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
> +/* Registers used as operands in MI_MATH_INSTR */
> +#define   MI_MATH_REG(x)                (x)
> +#define   MI_MATH_REG_SRCA              0x20
> +#define   MI_MATH_REG_SRCB              0x21
> +#define   MI_MATH_REG_ACCU              0x31
> +#define   MI_MATH_REG_ZF                0x32
> +#define   MI_MATH_REG_CF                0x33
> +
> +#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
> +#define   MI_CS_MMIO_DST BIT(19)
> +#define   MI_CS_MMIO_SRC BIT(18)
> +
> +static unsigned int offset_in_page(void *addr)
> +{
> +	return (uintptr_t)addr & 4095;
>   }
>   
>   static void add_dep(struct deps *deps, struct dep_entry entry)
> @@ -1392,91 +1395,116 @@ __get_ctx(struct workload *wrk, const struct w_step *w)
>   	return &wrk->ctx_list[w->context];
>   }
>   
> -static unsigned long
> -__get_bb_sz(const struct w_step *w, unsigned int duration)
> -{
> -	enum intel_engine_id engine = w->engine;
> -	struct ctx *ctx = __get_ctx(w->wrk, w);
> -	unsigned long d;
> -
> -	if (ctx->engine_map && engine == DEFAULT)
> -		/* Assume first engine calibration. */
> -		engine = ctx->engine_map[0];
> -
> -	igt_assert(engine_calib_map[engine]);
> -	d = ALIGN(duration * engine_calib_map[engine] * sizeof(uint32_t) /
> -		  nop_calibration_us,
> -		  sizeof(uint32_t));
> -
> -	return d;
> -}
> -
> -static unsigned long
> -get_bb_sz(const struct w_step *w, unsigned int duration)
> +static uint32_t mmio_base(int i915, enum intel_engine_id engine, int gen)
>   {
> -	unsigned long d = __get_bb_sz(w, duration);
> -
> -	igt_assert(d);
> +	const char *name;
>   
> -	return d;
> -}
> +	if (gen >= 11)
> +		return 0;
>   
> -static void init_bb(struct w_step *w)
> -{
> -	const unsigned int arb_period =
> -			__get_bb_sz(w, w->preempt_us) / sizeof(uint32_t);
> -	const unsigned int mmap_len = ALIGN(w->bb_sz, 4096);
> -	unsigned int i;
> -	uint32_t *ptr;
> +	switch (engine) {
> +	case NUM_ENGINES:
> +	default:
> +		return 0;
>   
> -	if (w->unbound_duration || !arb_period)
> -		return;
> +	case DEFAULT:
> +	case RCS:
> +		name = "rcs0";
> +		break;
>   
> -	gem_set_domain(fd, w->bb_handle,
> -		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
> +	case BCS:
> +		name = "bcs0";
> +		break;
>   
> -	ptr = gem_mmap__wc(fd, w->bb_handle, 0, mmap_len, PROT_WRITE);
> +	case VCS:
> +	case VCS1:
> +		name = "vcs0";
> +		break;
> +	case VCS2:
> +		name = "vcs1";
> +		break;
>   
> -	for (i = arb_period; i < w->bb_sz / sizeof(uint32_t); i += arb_period)
> -		ptr[i] = 0x5 << 23; /* MI_ARB_CHK */
> +	case VECS:
> +		name = "vecs0";
> +		break;
> +	}
>   
> -	munmap(ptr, mmap_len);
> +	return gem_engine_mmio_base(i915, name);

Why is mmio base needed if relative addressing is used? Maybe I'll 
figure it out after reading further.

>   }
>   
> -static unsigned int terminate_bb(struct w_step *w)
> +static unsigned int create_bb(struct w_step *w, int self)
>   {
> -	const uint32_t bbe = 0xa << 23;
> -	unsigned long mmap_start, mmap_len;
> -	unsigned long batch_start = w->bb_sz;
> +	const int gen = intel_gen(intel_get_drm_devid(fd));
> +	const uint32_t base = mmio_base(fd, w->engine, gen);
> +#define CS_GPR(x) (base + 0x600 + 8 * (x))
> +#define TIMESTAMP (base + 0x3a8)
> +	const int use_64b = gen >= 8;
> +	enum { START_TS, NOW_TS };
> +	uint32_t *ptr, *cs, *jmp;
>   	unsigned int r = 0;
> -	uint32_t *ptr, *cs;
> -
> -	batch_start -= sizeof(uint32_t); /* bbend */
> -
> -	if (w->unbound_duration)
> -		batch_start -= 4 * sizeof(uint32_t); /* MI_ARB_CHK + MI_BATCH_BUFFER_START */
> -
> -	mmap_start = rounddown(batch_start, PAGE_SIZE);
> -	mmap_len = ALIGN(w->bb_sz - mmap_start, PAGE_SIZE);
>   
>   	gem_set_domain(fd, w->bb_handle,
>   		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
>   
> -	ptr = gem_mmap__wc(fd, w->bb_handle, mmap_start, mmap_len, PROT_WRITE);
> -	cs = (uint32_t *)((char *)ptr + batch_start - mmap_start);
> +	cs = ptr = gem_mmap__wc(fd, w->bb_handle, 0, 4096, PROT_WRITE);
>   
> -	if (w->unbound_duration) {
> -		w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t);
> -		batch_start += 4 * sizeof(uint32_t);
> +	*cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO_DST;
> +	*cs++ = CS_GPR(START_TS) + 4;

What is "+ 4"?

> +	*cs++ = 0;
> +	*cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO_DST | MI_CS_MMIO_SRC;
> +	*cs++ = TIMESTAMP;
> +	*cs++ = CS_GPR(START_TS);
>   
> -		*cs++ = w->preempt_us ? 0x5 << 23 /* MI_ARB_CHK; */ : MI_NOOP;
> -		w->recursive_bb_start = cs;
> -		*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
> +	if (offset_in_page(cs) & 4)
>   		*cs++ = 0;
> +	jmp = cs;
> +
> +	if (w->preempt_us)
> +		*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
> +
> +	*cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO_DST;
> +	*cs++ = CS_GPR(NOW_TS) + 4;
> +	*cs++ = 0;
> +	*cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO_DST | MI_CS_MMIO_SRC;
> +	*cs++ = TIMESTAMP;
> +	*cs++ = CS_GPR(NOW_TS);
> +
> +	*cs++ = MI_MATH(4);
> +	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
> +	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));

MI_MATH_REG is aliased to CS_GPR?

> +	*cs++ = MI_MATH_SUB;
> +	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
> +
> +	*cs++ = 0x24 << 23 | (1 + use_64b) | MI_CS_MMIO_DST; /* SRM */

All others have nice defines but SRM, any special reason?

> +	*cs++ = CS_GPR(NOW_TS);
> +	w->reloc[r].target_handle = self;
> +	w->reloc[r].offset = offset_in_page(cs);
> +	*cs++ = w->reloc[r].delta = 4000;
> +	*cs++ = 0;
> +	r++;
> +
> +	/* Delay between SRM and COND_BBE to post the writes */
> +	for (int n = 0; n < 8; n++) {
> +		*cs++ = MI_INSTR(0x21, 1);
> +		*cs++ = 2048;
>   		*cs++ = 0;

Whats this instruction? Add a define so it is self-documenting?

>   	}
>   
> -	*cs = bbe;
> +	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
> +	w->bb_duration = cs;
> +	*cs++ = 0;
> +	w->reloc[r].target_handle = self;
> +	w->reloc[r].offset = offset_in_page(cs);
> +	*cs++ = w->reloc[r].delta = 4000;
> +	*cs++ = 0;
> +	r++;
> +
> +	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
> +	w->reloc[r].target_handle = self;
> +	w->reloc[r].offset = offset_in_page(cs);
> +	*cs++ = w->reloc[r].delta = offset_in_page(jmp);

Presumably MI_MATH stuff relaxed the loop enough and we don't need any 
extra noops?

I would appreaciate a banner style comment explaining the batch layout 
mentioning the interesting offsets and high-level logic.

> +	*cs++ = 0;
> +	r++;
>   
>   	return r;
>   }
> @@ -1590,23 +1618,10 @@ alloc_step_batch(struct workload *wrk, struct w_step *w)
>   		igt_assert(j < nr_obj);
>   	}
>   
> -	if (w->unbound_duration)
> -		/* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
> -		w->bb_sz = max(PAGE_SIZE, __get_bb_sz(w, w->preempt_us)) +
> -			   (1 + 3) * sizeof(uint32_t);
> -	else
> -		w->bb_sz = get_bb_sz(w, w->duration.max);
> -
> -	w->bb_handle = w->obj[j].handle =
> -		alloc_bo(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0));
> -	init_bb(w);
> -	w->obj[j].relocation_count = terminate_bb(w);
> -
> -	if (w->obj[j].relocation_count) {
> -		igt_assert(w->unbound_duration);
> -		w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
> -		w->reloc[0].target_handle = j;
> -	}
> +	w->bb_handle = w->obj[j].handle = gem_create(fd, 4096);
> +	w->obj[j].relocation_count = create_bb(w, j);
> +	igt_assert(w->obj[j].relocation_count <= ARRAY_SIZE(w->reloc));
> +	w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
>   
>   	w->eb.buffers_ptr = to_user_pointer(w->obj);
>   	w->eb.buffer_count = j + 1;
> @@ -1617,8 +1632,8 @@ alloc_step_batch(struct workload *wrk, struct w_step *w)
>   	printf("%u: %u:|", w->idx, w->eb.buffer_count);
>   	for (i = 0; i <= j; i++)
>   		printf("%x|", w->obj[i].handle);
> -	printf(" %10lu flags=%llx bb=%x[%u] ctx[%u]=%u\n",
> -		w->bb_sz, w->eb.flags, w->bb_handle, j, w->context,
> +	printf(" flags=%llx bb=%x[%u] ctx[%u]=%u\n",
> +		w->eb.flags, w->bb_handle, j, w->context,
>   		get_ctxid(wrk, w));
>   #endif
>   }
> @@ -1803,7 +1818,7 @@ static void measure_active_set(struct workload *wrk)
>   		if (w->type != BATCH)
>   			continue;
>   
> -		batch_sizes += w->bb_sz;
> +		batch_sizes += 4096;
>   
>   		for (j = 0; j < w->data_deps.nr; j++) {
>   			struct dep_entry *dep = &w->data_deps.list[j];
> @@ -1904,6 +1919,10 @@ static int prepare_workload(unsigned int id, struct workload *wrk)
>   					wsim_err("Load balancing needs an engine map!\n");
>   					return 1;
>   				}
> +				if (intel_gen(intel_get_drm_devid(fd)) < 11) {
> +					wsim_err("Load balancing needs relative mmio support, gen11+!\n");
> +					return 1;
> +				}
>   				ctx->load_balance = w->load_balance;
>   			} else if (w->type == BOND) {
>   				if (!ctx->load_balance) {
> @@ -2163,15 +2182,15 @@ static int elapsed_us(const struct timespec *start, const struct timespec *end)
>   }
>   
>   static void
> -update_bb_start(struct w_step *w)
> +update_bb_start(struct workload *wrk, struct w_step *w)

I think there is w->wrk if you find it easier but it's only one callsite 
so it's probably even better like this.

>   {
> -	if (!w->unbound_duration)
> -		return;
> +	uint32_t ticks;
>   
> -	gem_set_domain(fd, w->bb_handle,
> -		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
> +	ticks = 0;
> +	if (!w->unbound_duration)
> +		ticks = ~ns_to_ctx_ticks(1000 * get_duration(wrk, w));

Hm inverted ticks, why? And since it is not obvious I think it deserves 
a comment.

>   
> -	*w->recursive_bb_start = MI_BATCH_BUFFER_START | (1 << 8) | 1;
> +	*w->bb_duration = ticks;
>   }
>   
>   static void w_sync_to(struct workload *wrk, struct w_step *w, int target)
> @@ -2198,13 +2217,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine)
>   	unsigned int i;
>   
>   	eb_update_flags(wrk, w, engine);
> -	update_bb_start(w);
> -
> -	w->eb.batch_start_offset =
> -		w->unbound_duration ?
> -		0 :
> -		ALIGN(w->bb_sz - get_bb_sz(w, get_duration(wrk, w)),
> -		      2 * sizeof(uint32_t));
> +	update_bb_start(wrk, w);
>   
>   	for (i = 0; i < w->fence_deps.nr; i++) {
>   		int tgt = w->idx + w->fence_deps.list[i].target;
> @@ -2353,8 +2366,7 @@ static void *run_workload(void *data)
>   				igt_assert(wrk->steps[t_idx].type == BATCH);
>   				igt_assert(wrk->steps[t_idx].unbound_duration);
>   
> -				*wrk->steps[t_idx].recursive_bb_start =
> -					MI_BATCH_BUFFER_END;
> +				*wrk->steps[t_idx].bb_duration = 0xffffffff;
>   				__sync_synchronize();
>   				continue;
>   			} else if (w->type == SSEU) {
> @@ -2467,131 +2479,15 @@ static void fini_workload(struct workload *wrk)
>   	free(wrk);
>   }
>   
> -static unsigned long calibrate_nop(unsigned int tolerance_pct, struct intel_execution_engine2 *engine)
> -{
> -	const uint32_t bbe = 0xa << 23;
> -	unsigned int loops = 17;
> -	unsigned int usecs = nop_calibration_us;
> -	struct drm_i915_gem_exec_object2 obj = {};
> -	struct drm_i915_gem_execbuffer2 eb = {
> -		.buffer_count = 1,
> -		.buffers_ptr = (uintptr_t)&obj,
> -		.flags = engine->flags
> -	};
> -	long size, last_size;
> -	struct timespec t_0, t_end;
> -
> -	clock_gettime(CLOCK_MONOTONIC, &t_0);
> -
> -	size = 256 * 1024;
> -	do {
> -		struct timespec t_start;
> -
> -		obj.handle = alloc_bo(fd, size);
> -		gem_write(fd, obj.handle, size - sizeof(bbe), &bbe,
> -			  sizeof(bbe));
> -		gem_execbuf(fd, &eb);
> -		gem_sync(fd, obj.handle);
> -
> -		clock_gettime(CLOCK_MONOTONIC, &t_start);
> -		for (int loop = 0; loop < loops; loop++)
> -			gem_execbuf(fd, &eb);
> -		gem_sync(fd, obj.handle);
> -		clock_gettime(CLOCK_MONOTONIC, &t_end);
> -
> -		gem_close(fd, obj.handle);
> -
> -		last_size = size;
> -		size = loops * size / elapsed(&t_start, &t_end) / 1e6 * usecs;
> -		size = ALIGN(size, sizeof(uint32_t));
> -	} while (elapsed(&t_0, &t_end) < 5 ||
> -		 labs(size - last_size) > (size * tolerance_pct / 100));
> -
> -	return size / sizeof(uint32_t);
> -}
> -
> -static void
> -calibrate_sequentially(void)
> -{
> -	struct intel_execution_engine2 *engine;
> -	enum intel_engine_id eng_id;
> -
> -	__for_each_physical_engine(fd, engine) {
> -		eng_id = ci_to_engine_id(engine->class, engine->instance);
> -		igt_assert(eng_id >= 0);
> -		engine_calib_map[eng_id] = calibrate_nop(fd, engine);
> -	}
> -}
> -
> -struct thread_data {
> -	struct intel_execution_engine2 *eng;
> -	pthread_t thr;
> -	unsigned long calib;
> -};
> -
> -static void *
> -engine_calibration_thread(void *data)
> -{
> -	struct thread_data *thr_d = (struct thread_data *) data;
> -
> -	thr_d->calib = calibrate_nop(fd, thr_d->eng);
> -	return NULL;
> -}
> -
> -static void
> -calibrate_in_parallel(void)
> -{
> -	struct thread_data *thr_d = malloc(NUM_ENGINES * sizeof(*thr_d));
> -	struct intel_execution_engine2 *engine;
> -	enum intel_engine_id id;
> -	int ret;
> -
> -	__for_each_physical_engine(fd, engine) {
> -		id = ci_to_engine_id(engine->class, engine->instance);
> -		thr_d[id].eng = engine;
> -		ret = pthread_create(&thr_d[id].thr, NULL, engine_calibration_thread, &thr_d[id]);
> -		igt_assert_eq(ret, 0);
> -	}
> -
> -	__for_each_physical_engine(fd, engine) {
> -		id = ci_to_engine_id(engine->class, engine->instance);
> -		igt_assert(id >= 0);
> -
> -		ret = pthread_join(thr_d[id].thr, NULL);
> -		igt_assert_eq(ret, 0);
> -		engine_calib_map[id] = thr_d[id].calib;
> -	}
> -
> -	free(thr_d);
> -}
> -
> -static void
> -calibrate_engines(void)
> -{
> -	if (sequential)
> -		calibrate_sequentially();
> -	else
> -		calibrate_in_parallel();
> -}
> -
>   static void print_help(void)
>   {
>   	puts(
>   "Usage: gem_wsim [OPTIONS]\n"
>   "\n"
>   "Runs a simulated workload on the GPU.\n"
> -"When ran without arguments performs a GPU calibration result of which needs to\n"
> -"be provided when running the simulation in subsequent invocations.\n"
> -"\n"
>   "Options:\n"
>   "  -h                This text.\n"
>   "  -q                Be quiet - do not output anything to stdout.\n"
> -"  -n <n |           Nop calibration value - single value is set to all engines\n"
> -"  e1=v1,e2=v2,n...> without specified value; you can also specify calibrations for\n"
> -"                    particular engines.\n"
> -"  -t <n>            Nop calibration tolerance percentage.\n"
> -"  -T                Disable sequential calibration and perform calibration in parallel.\n"
> -"                    Use when there is a difficulty obtaining calibration with the\n"
>   "                    default settings.\n"

One more line to snip here.

>   "  -I <n>            Initial randomness seed.\n"
>   "  -p <n>            Context priority to use for the following workload on the\n"
> @@ -2671,17 +2567,12 @@ int main(int argc, char **argv)
>   	int master_workload = -1;
>   	char *append_workload_arg = NULL;
>   	struct w_arg *w_args = NULL;
> -	unsigned int tolerance_pct = 1;
>   	int exitcode = EXIT_FAILURE;
>   	double scale_time = 1.0f;
>   	double scale_dur = 1.0f;
>   	int prio = 0;
>   	double t;
> -	int i, c;
> -	char *subopts, *value;
> -	int raw_number = 0;
> -	long calib_val;
> -	int eng;
> +	int i, c, ret;
>   
>   	/*
>   	 * Open the device via the low-level API so we can do the GPU quiesce
> @@ -2721,70 +2612,7 @@ int main(int argc, char **argv)
>   		case 'c':
>   			clients = strtol(optarg, NULL, 0);
>   			break;
> -		case 't':
> -			tolerance_pct = strtol(optarg, NULL, 0);
> -			break;
> -		case 'T':
> -			sequential = false;
> -			break;
> -
> -		case 'n':
> -			subopts = optarg;
> -			while (*subopts != '\0') {
> -				eng = getsubopt(&subopts, (char **)ring_str_map, &value);
> -				if (!value) {
> -					/* only engine name was given */
> -					wsim_err("Missing calibration value for '%s'!\n",
> -						ring_str_map[eng]);
> -					goto err;
> -				}
>   
> -				calib_val = atol(value);
> -
> -				if (eng >= 0 && eng < NUM_ENGINES) {
> -				/* engine name with some value were given */
> -
> -					if (eng == DEFAULT || eng == VCS) {
> -						wsim_err("'%s' not allowed in engine calibrations!\n",
> -							ring_str_map[eng]);
> -						goto err;
> -					} else if (calib_val <= 0) {
> -						wsim_err("Invalid calibration for engine '%s' - value "
> -						"is either non-positive or is not a number!\n",
> -							ring_str_map[eng]);
> -						goto err;
> -					} else if (engine_calib_map[eng]) {
> -						wsim_err("Invalid repeated calibration of '%s'!\n",
> -							ring_str_map[eng]);
> -						goto err;
> -					} else {
> -						engine_calib_map[eng] = calib_val;
> -						if (eng == RCS)
> -							engine_calib_map[DEFAULT] = calib_val;
> -						else if (eng == VCS1 || eng == VCS2)
> -							engine_calib_map[VCS] = calib_val;
> -						has_nop_calibration = true;
> -					}
> -				} else {
> -					/* raw number was given */
> -
> -					if (!calib_val) {
> -						wsim_err("Invalid engine or zero calibration!\n");
> -						goto err;
> -					} else if (calib_val < 0) {
> -						wsim_err("Invalid negative calibration!\n");
> -						goto err;
> -					} else if (raw_number) {
> -						wsim_err("Default engine calibration provided more than once!\n");
> -						goto err;
> -					} else {
> -						raw_number = calib_val;
> -						apply_unset_calibrations(raw_number);
> -						has_nop_calibration = true;
> -					}
> -				}
> -			}
> -			break;
>   		case 'r':
>   			repeat = strtol(optarg, NULL, 0);
>   			break;
> @@ -2812,6 +2640,9 @@ int main(int argc, char **argv)
>   		case 'F':
>   			scale_time = atof(optarg);
>   			break;
> +		case 'n':
> +			/* ignored; using HW timers */
> +			break;

For what user? I deleted media-bench.pl but maybe you are using it locally?

>   		case 'h':
>   			print_help();
>   			goto out;
> @@ -2820,19 +2651,6 @@ int main(int argc, char **argv)
>   		}
>   	}
>   
> -	if (!has_nop_calibration) {
> -		if (verbose > 1) {
> -			printf("Calibrating nop delays with %u%% tolerance...\n",
> -				tolerance_pct);
> -		}
> -
> -		calibrate_engines();
> -
> -		if (verbose)
> -			print_engine_calibrations();
> -		goto out;
> -	}
> -
>   	if (!nr_w_args) {
>   		wsim_err("No workload descriptor(s)!\n");
>   		goto err;
> @@ -2885,7 +2703,6 @@ int main(int argc, char **argv)
>   
>   	if (verbose > 1) {
>   		printf("Random seed is %u.\n", master_prng);
> -		print_engine_calibrations();
>   		printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
>   	}
>   
> @@ -2916,16 +2733,13 @@ int main(int argc, char **argv)
>   	clock_gettime(CLOCK_MONOTONIC, &t_start);
>   
>   	for (i = 0; i < clients; i++) {
> -		int ret;
> -
>   		ret = pthread_create(&w[i]->thread, NULL, run_workload, w[i]);
>   		igt_assert_eq(ret, 0);
>   	}
>   
>   	if (master_workload >= 0) {
> -		int ret = pthread_join(w[master_workload]->thread, NULL);
> -
> -		igt_assert(ret == 0);
> +		ret = pthread_join(w[master_workload]->thread, NULL);
> +		igt_assert_eq(ret, 0);
>   
>   		for (i = 0; i < clients; i++)
>   			w[i]->run = false;
> @@ -2933,8 +2747,8 @@ int main(int argc, char **argv)
>   
>   	for (i = 0; i < clients; i++) {
>   		if (master_workload != i) {
> -			int ret = pthread_join(w[i]->thread, NULL);
> -			igt_assert(ret == 0);
> +			ret = pthread_join(w[i]->thread, NULL);
> +			igt_assert_eq(ret, 0);
>   		}
>   	}
>   
> 

Cool.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Intel-gfx] [PATCH i-g-t] gem_wsim: Use CTX_TIMESTAMP for timed spinners
@ 2020-11-02 15:33 Chris Wilson
  2020-11-02 17:14 ` Tvrtko Ursulin
  0 siblings, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2020-11-02 15:33 UTC (permalink / raw)
  To: igt-dev; +Cc: intel-gfx, Chris Wilson

Use MI_MATH and MI_COND_BBE we can construct a loop that runs for a
precise number of clock cycles, as measured by the CTX_TIMESTAMP. We use
the CTX_TIMESTAMP (as opposed to the CS_TIMESTAMP) so that the elapsed
time is measured local to the context, and the length of the batch is
unaffected by preemption. Since the clock ticks at a known frequency, we
can directly translate the batch durations into cycles and so remove the
requirement for nop calibration, and the often excessively large nop
batches.

The downside to this is that we need to use engine local registers, and
before gen11 there is no support in the CS for relative mmio and so this
technique does not support transparent load balancing on a virtual
engine before Icelake.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 benchmarks/gem_wsim.c | 524 ++++++++++++++----------------------------
 1 file changed, 169 insertions(+), 355 deletions(-)

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index dbb46b9aa..5d67468d1 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -176,10 +176,9 @@ struct w_step
 
 	struct drm_i915_gem_execbuffer2 eb;
 	struct drm_i915_gem_exec_object2 *obj;
-	struct drm_i915_gem_relocation_entry reloc[1];
-	unsigned long bb_sz;
+	struct drm_i915_gem_relocation_entry reloc[3];
 	uint32_t bb_handle;
-	uint32_t *recursive_bb_start;
+	uint32_t *bb_duration;
 };
 
 struct ctx {
@@ -227,10 +226,6 @@ struct workload
 	unsigned int nrequest[NUM_ENGINES];
 };
 
-static const unsigned int nop_calibration_us = 1000;
-static bool has_nop_calibration = false;
-static bool sequential = true;
-
 static unsigned int master_prng;
 
 static int verbose = 1;
@@ -253,59 +248,67 @@ static const char *ring_str_map[NUM_ENGINES] = {
 	[VECS] = "VECS",
 };
 
-/* stores calibrations for particular engines */
-static unsigned long engine_calib_map[NUM_ENGINES];
-
-static enum intel_engine_id
-ci_to_engine_id(int class, int instance)
-{
-	static const struct {
-		int class;
-		int instance;
-		unsigned int id;
-	} map[] = {
-		{ I915_ENGINE_CLASS_RENDER, 0, RCS },
-		{ I915_ENGINE_CLASS_COPY, 0, BCS },
-		{ I915_ENGINE_CLASS_VIDEO, 0, VCS1 },
-		{ I915_ENGINE_CLASS_VIDEO, 1, VCS2 },
-		{ I915_ENGINE_CLASS_VIDEO, 2, VCS2 }, /* FIXME/ICL */
-		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS },
+static int read_timestamp_frequency(int i915)
+{
+	int value = 0;
+	drm_i915_getparam_t gp = {
+		.value = &value,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
 	};
-
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(map); i++) {
-		if (class == map[i].class && instance == map[i].instance)
-			return map[i].id;
-	}
-	return -1;
+	ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+	return value;
 }
 
-static void
-apply_unset_calibrations(unsigned long raw_number)
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
 {
-	for (int i = 0; i < NUM_ENGINES; i++)
-		engine_calib_map[i] += engine_calib_map[i] ? 0 : raw_number;
+	return (x + y - 1) / y;
 }
 
-static void
-print_engine_calibrations(void)
+static uint64_t ns_to_ctx_ticks(uint64_t ns)
 {
-	bool first_entry = true;
+	static long f;
 
-	printf("Nop calibration for %uus delay is: ", nop_calibration_us);
-	for (int i = 0; i < NUM_ENGINES; i++) {
-		/* skip engines not present and DEFAULT and VCS */
-		if (i != DEFAULT && i != VCS && engine_calib_map[i]) {
-			if (first_entry) {
-				printf("%s=%lu", ring_str_map[i], engine_calib_map[i]);
-				first_entry = false;
-			} else {
-				printf(",%s=%lu", ring_str_map[i], engine_calib_map[i]);
-			}
-		}
+	if (!f) {
+		f = read_timestamp_frequency(fd);
+		if (intel_gen(intel_get_drm_devid(fd)) == 11)
+			f = 12500000; /* icl!!! are you feeling alright? */
 	}
-	printf("\n");
+
+	return div64_u64_round_up(ns * f, NSEC_PER_SEC);
+}
+
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
+#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
+#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
+#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
+#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
+#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define   MI_MATH_REG(x)                (x)
+#define   MI_MATH_REG_SRCA              0x20
+#define   MI_MATH_REG_SRCB              0x21
+#define   MI_MATH_REG_ACCU              0x31
+#define   MI_MATH_REG_ZF                0x32
+#define   MI_MATH_REG_CF                0x33
+
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
+#define   MI_CS_MMIO_DST BIT(19)
+#define   MI_CS_MMIO_SRC BIT(18)
+
+static unsigned int offset_in_page(void *addr)
+{
+	return (uintptr_t)addr & 4095;
 }
 
 static void add_dep(struct deps *deps, struct dep_entry entry)
@@ -1392,91 +1395,116 @@ __get_ctx(struct workload *wrk, const struct w_step *w)
 	return &wrk->ctx_list[w->context];
 }
 
-static unsigned long
-__get_bb_sz(const struct w_step *w, unsigned int duration)
-{
-	enum intel_engine_id engine = w->engine;
-	struct ctx *ctx = __get_ctx(w->wrk, w);
-	unsigned long d;
-
-	if (ctx->engine_map && engine == DEFAULT)
-		/* Assume first engine calibration. */
-		engine = ctx->engine_map[0];
-
-	igt_assert(engine_calib_map[engine]);
-	d = ALIGN(duration * engine_calib_map[engine] * sizeof(uint32_t) /
-		  nop_calibration_us,
-		  sizeof(uint32_t));
-
-	return d;
-}
-
-static unsigned long
-get_bb_sz(const struct w_step *w, unsigned int duration)
+static uint32_t mmio_base(int i915, enum intel_engine_id engine, int gen)
 {
-	unsigned long d = __get_bb_sz(w, duration);
-
-	igt_assert(d);
+	const char *name;
 
-	return d;
-}
+	if (gen >= 11)
+		return 0;
 
-static void init_bb(struct w_step *w)
-{
-	const unsigned int arb_period =
-			__get_bb_sz(w, w->preempt_us) / sizeof(uint32_t);
-	const unsigned int mmap_len = ALIGN(w->bb_sz, 4096);
-	unsigned int i;
-	uint32_t *ptr;
+	switch (engine) {
+	case NUM_ENGINES:
+	default:
+		return 0;
 
-	if (w->unbound_duration || !arb_period)
-		return;
+	case DEFAULT:
+	case RCS:
+		name = "rcs0";
+		break;
 
-	gem_set_domain(fd, w->bb_handle,
-		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+	case BCS:
+		name = "bcs0";
+		break;
 
-	ptr = gem_mmap__wc(fd, w->bb_handle, 0, mmap_len, PROT_WRITE);
+	case VCS:
+	case VCS1:
+		name = "vcs0";
+		break;
+	case VCS2:
+		name = "vcs1";
+		break;
 
-	for (i = arb_period; i < w->bb_sz / sizeof(uint32_t); i += arb_period)
-		ptr[i] = 0x5 << 23; /* MI_ARB_CHK */
+	case VECS:
+		name = "vecs0";
+		break;
+	}
 
-	munmap(ptr, mmap_len);
+	return gem_engine_mmio_base(i915, name);
 }
 
-static unsigned int terminate_bb(struct w_step *w)
+static unsigned int create_bb(struct w_step *w, int self)
 {
-	const uint32_t bbe = 0xa << 23;
-	unsigned long mmap_start, mmap_len;
-	unsigned long batch_start = w->bb_sz;
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const uint32_t base = mmio_base(fd, w->engine, gen);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define TIMESTAMP (base + 0x3a8)
+	const int use_64b = gen >= 8;
+	enum { START_TS, NOW_TS };
+	uint32_t *ptr, *cs, *jmp;
 	unsigned int r = 0;
-	uint32_t *ptr, *cs;
-
-	batch_start -= sizeof(uint32_t); /* bbend */
-
-	if (w->unbound_duration)
-		batch_start -= 4 * sizeof(uint32_t); /* MI_ARB_CHK + MI_BATCH_BUFFER_START */
-
-	mmap_start = rounddown(batch_start, PAGE_SIZE);
-	mmap_len = ALIGN(w->bb_sz - mmap_start, PAGE_SIZE);
 
 	gem_set_domain(fd, w->bb_handle,
 		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
 
-	ptr = gem_mmap__wc(fd, w->bb_handle, mmap_start, mmap_len, PROT_WRITE);
-	cs = (uint32_t *)((char *)ptr + batch_start - mmap_start);
+	cs = ptr = gem_mmap__wc(fd, w->bb_handle, 0, 4096, PROT_WRITE);
 
-	if (w->unbound_duration) {
-		w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t);
-		batch_start += 4 * sizeof(uint32_t);
+	*cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO_DST;
+	*cs++ = CS_GPR(START_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO_DST | MI_CS_MMIO_SRC;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(START_TS);
 
-		*cs++ = w->preempt_us ? 0x5 << 23 /* MI_ARB_CHK; */ : MI_NOOP;
-		w->recursive_bb_start = cs;
-		*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+	if (offset_in_page(cs) & 4)
 		*cs++ = 0;
+	jmp = cs;
+
+	if (w->preempt_us)
+		*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+	*cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO_DST;
+	*cs++ = CS_GPR(NOW_TS) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO_DST | MI_CS_MMIO_SRC;
+	*cs++ = TIMESTAMP;
+	*cs++ = CS_GPR(NOW_TS);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+	*cs++ = MI_MATH_SUB;
+	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+	*cs++ = 0x24 << 23 | (1 + use_64b) | MI_CS_MMIO_DST; /* SRM */
+	*cs++ = CS_GPR(NOW_TS);
+	w->reloc[r].target_handle = self;
+	w->reloc[r].offset = offset_in_page(cs);
+	*cs++ = w->reloc[r].delta = 4000;
+	*cs++ = 0;
+	r++;
+
+	/* Delay between SRM and COND_BBE to post the writes */
+	for (int n = 0; n < 8; n++) {
+		*cs++ = MI_INSTR(0x21, 1);
+		*cs++ = 2048;
 		*cs++ = 0;
 	}
 
-	*cs = bbe;
+	*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+	w->bb_duration = cs;
+	*cs++ = 0;
+	w->reloc[r].target_handle = self;
+	w->reloc[r].offset = offset_in_page(cs);
+	*cs++ = w->reloc[r].delta = 4000;
+	*cs++ = 0;
+	r++;
+
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+	w->reloc[r].target_handle = self;
+	w->reloc[r].offset = offset_in_page(cs);
+	*cs++ = w->reloc[r].delta = offset_in_page(jmp);
+	*cs++ = 0;
+	r++;
 
 	return r;
 }
@@ -1590,23 +1618,10 @@ alloc_step_batch(struct workload *wrk, struct w_step *w)
 		igt_assert(j < nr_obj);
 	}
 
-	if (w->unbound_duration)
-		/* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
-		w->bb_sz = max(PAGE_SIZE, __get_bb_sz(w, w->preempt_us)) +
-			   (1 + 3) * sizeof(uint32_t);
-	else
-		w->bb_sz = get_bb_sz(w, w->duration.max);
-
-	w->bb_handle = w->obj[j].handle =
-		alloc_bo(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0));
-	init_bb(w);
-	w->obj[j].relocation_count = terminate_bb(w);
-
-	if (w->obj[j].relocation_count) {
-		igt_assert(w->unbound_duration);
-		w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
-		w->reloc[0].target_handle = j;
-	}
+	w->bb_handle = w->obj[j].handle = gem_create(fd, 4096);
+	w->obj[j].relocation_count = create_bb(w, j);
+	igt_assert(w->obj[j].relocation_count <= ARRAY_SIZE(w->reloc));
+	w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
 
 	w->eb.buffers_ptr = to_user_pointer(w->obj);
 	w->eb.buffer_count = j + 1;
@@ -1617,8 +1632,8 @@ alloc_step_batch(struct workload *wrk, struct w_step *w)
 	printf("%u: %u:|", w->idx, w->eb.buffer_count);
 	for (i = 0; i <= j; i++)
 		printf("%x|", w->obj[i].handle);
-	printf(" %10lu flags=%llx bb=%x[%u] ctx[%u]=%u\n",
-		w->bb_sz, w->eb.flags, w->bb_handle, j, w->context,
+	printf(" flags=%llx bb=%x[%u] ctx[%u]=%u\n",
+		w->eb.flags, w->bb_handle, j, w->context,
 		get_ctxid(wrk, w));
 #endif
 }
@@ -1803,7 +1818,7 @@ static void measure_active_set(struct workload *wrk)
 		if (w->type != BATCH)
 			continue;
 
-		batch_sizes += w->bb_sz;
+		batch_sizes += 4096;
 
 		for (j = 0; j < w->data_deps.nr; j++) {
 			struct dep_entry *dep = &w->data_deps.list[j];
@@ -1904,6 +1919,10 @@ static int prepare_workload(unsigned int id, struct workload *wrk)
 					wsim_err("Load balancing needs an engine map!\n");
 					return 1;
 				}
+				if (intel_gen(intel_get_drm_devid(fd)) < 11) {
+					wsim_err("Load balancing needs relative mmio support, gen11+!\n");
+					return 1;
+				}
 				ctx->load_balance = w->load_balance;
 			} else if (w->type == BOND) {
 				if (!ctx->load_balance) {
@@ -2163,15 +2182,15 @@ static int elapsed_us(const struct timespec *start, const struct timespec *end)
 }
 
 static void
-update_bb_start(struct w_step *w)
+update_bb_start(struct workload *wrk, struct w_step *w)
 {
-	if (!w->unbound_duration)
-		return;
+	uint32_t ticks;
 
-	gem_set_domain(fd, w->bb_handle,
-		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+	ticks = 0;
+	if (!w->unbound_duration)
+		ticks = ~ns_to_ctx_ticks(1000 * get_duration(wrk, w));
 
-	*w->recursive_bb_start = MI_BATCH_BUFFER_START | (1 << 8) | 1;
+	*w->bb_duration = ticks;
 }
 
 static void w_sync_to(struct workload *wrk, struct w_step *w, int target)
@@ -2198,13 +2217,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine)
 	unsigned int i;
 
 	eb_update_flags(wrk, w, engine);
-	update_bb_start(w);
-
-	w->eb.batch_start_offset =
-		w->unbound_duration ?
-		0 :
-		ALIGN(w->bb_sz - get_bb_sz(w, get_duration(wrk, w)),
-		      2 * sizeof(uint32_t));
+	update_bb_start(wrk, w);
 
 	for (i = 0; i < w->fence_deps.nr; i++) {
 		int tgt = w->idx + w->fence_deps.list[i].target;
@@ -2353,8 +2366,7 @@ static void *run_workload(void *data)
 				igt_assert(wrk->steps[t_idx].type == BATCH);
 				igt_assert(wrk->steps[t_idx].unbound_duration);
 
-				*wrk->steps[t_idx].recursive_bb_start =
-					MI_BATCH_BUFFER_END;
+				*wrk->steps[t_idx].bb_duration = 0xffffffff;
 				__sync_synchronize();
 				continue;
 			} else if (w->type == SSEU) {
@@ -2467,131 +2479,15 @@ static void fini_workload(struct workload *wrk)
 	free(wrk);
 }
 
-static unsigned long calibrate_nop(unsigned int tolerance_pct, struct intel_execution_engine2 *engine)
-{
-	const uint32_t bbe = 0xa << 23;
-	unsigned int loops = 17;
-	unsigned int usecs = nop_calibration_us;
-	struct drm_i915_gem_exec_object2 obj = {};
-	struct drm_i915_gem_execbuffer2 eb = {
-		.buffer_count = 1,
-		.buffers_ptr = (uintptr_t)&obj,
-		.flags = engine->flags
-	};
-	long size, last_size;
-	struct timespec t_0, t_end;
-
-	clock_gettime(CLOCK_MONOTONIC, &t_0);
-
-	size = 256 * 1024;
-	do {
-		struct timespec t_start;
-
-		obj.handle = alloc_bo(fd, size);
-		gem_write(fd, obj.handle, size - sizeof(bbe), &bbe,
-			  sizeof(bbe));
-		gem_execbuf(fd, &eb);
-		gem_sync(fd, obj.handle);
-
-		clock_gettime(CLOCK_MONOTONIC, &t_start);
-		for (int loop = 0; loop < loops; loop++)
-			gem_execbuf(fd, &eb);
-		gem_sync(fd, obj.handle);
-		clock_gettime(CLOCK_MONOTONIC, &t_end);
-
-		gem_close(fd, obj.handle);
-
-		last_size = size;
-		size = loops * size / elapsed(&t_start, &t_end) / 1e6 * usecs;
-		size = ALIGN(size, sizeof(uint32_t));
-	} while (elapsed(&t_0, &t_end) < 5 ||
-		 labs(size - last_size) > (size * tolerance_pct / 100));
-
-	return size / sizeof(uint32_t);
-}
-
-static void
-calibrate_sequentially(void)
-{
-	struct intel_execution_engine2 *engine;
-	enum intel_engine_id eng_id;
-
-	__for_each_physical_engine(fd, engine) {
-		eng_id = ci_to_engine_id(engine->class, engine->instance);
-		igt_assert(eng_id >= 0);
-		engine_calib_map[eng_id] = calibrate_nop(fd, engine);
-	}
-}
-
-struct thread_data {
-	struct intel_execution_engine2 *eng;
-	pthread_t thr;
-	unsigned long calib;
-};
-
-static void *
-engine_calibration_thread(void *data)
-{
-	struct thread_data *thr_d = (struct thread_data *) data;
-
-	thr_d->calib = calibrate_nop(fd, thr_d->eng);
-	return NULL;
-}
-
-static void
-calibrate_in_parallel(void)
-{
-	struct thread_data *thr_d = malloc(NUM_ENGINES * sizeof(*thr_d));
-	struct intel_execution_engine2 *engine;
-	enum intel_engine_id id;
-	int ret;
-
-	__for_each_physical_engine(fd, engine) {
-		id = ci_to_engine_id(engine->class, engine->instance);
-		thr_d[id].eng = engine;
-		ret = pthread_create(&thr_d[id].thr, NULL, engine_calibration_thread, &thr_d[id]);
-		igt_assert_eq(ret, 0);
-	}
-
-	__for_each_physical_engine(fd, engine) {
-		id = ci_to_engine_id(engine->class, engine->instance);
-		igt_assert(id >= 0);
-
-		ret = pthread_join(thr_d[id].thr, NULL);
-		igt_assert_eq(ret, 0);
-		engine_calib_map[id] = thr_d[id].calib;
-	}
-
-	free(thr_d);
-}
-
-static void
-calibrate_engines(void)
-{
-	if (sequential)
-		calibrate_sequentially();
-	else
-		calibrate_in_parallel();
-}
-
 static void print_help(void)
 {
 	puts(
 "Usage: gem_wsim [OPTIONS]\n"
 "\n"
 "Runs a simulated workload on the GPU.\n"
-"When ran without arguments performs a GPU calibration result of which needs to\n"
-"be provided when running the simulation in subsequent invocations.\n"
-"\n"
 "Options:\n"
 "  -h                This text.\n"
 "  -q                Be quiet - do not output anything to stdout.\n"
-"  -n <n |           Nop calibration value - single value is set to all engines\n"
-"  e1=v1,e2=v2,n...> without specified value; you can also specify calibrations for\n"
-"                    particular engines.\n"
-"  -t <n>            Nop calibration tolerance percentage.\n"
-"  -T                Disable sequential calibration and perform calibration in parallel.\n"
-"                    Use when there is a difficulty obtaining calibration with the\n"
 "                    default settings.\n"
 "  -I <n>            Initial randomness seed.\n"
 "  -p <n>            Context priority to use for the following workload on the\n"
@@ -2671,17 +2567,12 @@ int main(int argc, char **argv)
 	int master_workload = -1;
 	char *append_workload_arg = NULL;
 	struct w_arg *w_args = NULL;
-	unsigned int tolerance_pct = 1;
 	int exitcode = EXIT_FAILURE;
 	double scale_time = 1.0f;
 	double scale_dur = 1.0f;
 	int prio = 0;
 	double t;
-	int i, c;
-	char *subopts, *value;
-	int raw_number = 0;
-	long calib_val;
-	int eng;
+	int i, c, ret;
 
 	/*
 	 * Open the device via the low-level API so we can do the GPU quiesce
@@ -2721,70 +2612,7 @@ int main(int argc, char **argv)
 		case 'c':
 			clients = strtol(optarg, NULL, 0);
 			break;
-		case 't':
-			tolerance_pct = strtol(optarg, NULL, 0);
-			break;
-		case 'T':
-			sequential = false;
-			break;
-
-		case 'n':
-			subopts = optarg;
-			while (*subopts != '\0') {
-				eng = getsubopt(&subopts, (char **)ring_str_map, &value);
-				if (!value) {
-					/* only engine name was given */
-					wsim_err("Missing calibration value for '%s'!\n",
-						ring_str_map[eng]);
-					goto err;
-				}
 
-				calib_val = atol(value);
-
-				if (eng >= 0 && eng < NUM_ENGINES) {
-				/* engine name with some value were given */
-
-					if (eng == DEFAULT || eng == VCS) {
-						wsim_err("'%s' not allowed in engine calibrations!\n",
-							ring_str_map[eng]);
-						goto err;
-					} else if (calib_val <= 0) {
-						wsim_err("Invalid calibration for engine '%s' - value "
-						"is either non-positive or is not a number!\n",
-							ring_str_map[eng]);
-						goto err;
-					} else if (engine_calib_map[eng]) {
-						wsim_err("Invalid repeated calibration of '%s'!\n",
-							ring_str_map[eng]);
-						goto err;
-					} else {
-						engine_calib_map[eng] = calib_val;
-						if (eng == RCS)
-							engine_calib_map[DEFAULT] = calib_val;
-						else if (eng == VCS1 || eng == VCS2)
-							engine_calib_map[VCS] = calib_val;
-						has_nop_calibration = true;
-					}
-				} else {
-					/* raw number was given */
-
-					if (!calib_val) {
-						wsim_err("Invalid engine or zero calibration!\n");
-						goto err;
-					} else if (calib_val < 0) {
-						wsim_err("Invalid negative calibration!\n");
-						goto err;
-					} else if (raw_number) {
-						wsim_err("Default engine calibration provided more than once!\n");
-						goto err;
-					} else {
-						raw_number = calib_val;
-						apply_unset_calibrations(raw_number);
-						has_nop_calibration = true;
-					}
-				}
-			}
-			break;
 		case 'r':
 			repeat = strtol(optarg, NULL, 0);
 			break;
@@ -2812,6 +2640,9 @@ int main(int argc, char **argv)
 		case 'F':
 			scale_time = atof(optarg);
 			break;
+		case 'n':
+			/* ignored; using HW timers */
+			break;
 		case 'h':
 			print_help();
 			goto out;
@@ -2820,19 +2651,6 @@ int main(int argc, char **argv)
 		}
 	}
 
-	if (!has_nop_calibration) {
-		if (verbose > 1) {
-			printf("Calibrating nop delays with %u%% tolerance...\n",
-				tolerance_pct);
-		}
-
-		calibrate_engines();
-
-		if (verbose)
-			print_engine_calibrations();
-		goto out;
-	}
-
 	if (!nr_w_args) {
 		wsim_err("No workload descriptor(s)!\n");
 		goto err;
@@ -2885,7 +2703,6 @@ int main(int argc, char **argv)
 
 	if (verbose > 1) {
 		printf("Random seed is %u.\n", master_prng);
-		print_engine_calibrations();
 		printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
 	}
 
@@ -2916,16 +2733,13 @@ int main(int argc, char **argv)
 	clock_gettime(CLOCK_MONOTONIC, &t_start);
 
 	for (i = 0; i < clients; i++) {
-		int ret;
-
 		ret = pthread_create(&w[i]->thread, NULL, run_workload, w[i]);
 		igt_assert_eq(ret, 0);
 	}
 
 	if (master_workload >= 0) {
-		int ret = pthread_join(w[master_workload]->thread, NULL);
-
-		igt_assert(ret == 0);
+		ret = pthread_join(w[master_workload]->thread, NULL);
+		igt_assert_eq(ret, 0);
 
 		for (i = 0; i < clients; i++)
 			w[i]->run = false;
@@ -2933,8 +2747,8 @@ int main(int argc, char **argv)
 
 	for (i = 0; i < clients; i++) {
 		if (master_workload != i) {
-			int ret = pthread_join(w[i]->thread, NULL);
-			igt_assert(ret == 0);
+			ret = pthread_join(w[i]->thread, NULL);
+			igt_assert_eq(ret, 0);
 		}
 	}
 
-- 
2.29.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-11-02 18:13 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-14 12:36 [Intel-gfx] [PATCH i-g-t] gem_wsim: Use CTX_TIMESTAMP for timed spinners Chris Wilson
2020-07-14 12:36 ` [igt-dev] " Chris Wilson
2020-07-14 13:09 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
2020-07-14 16:05 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
2020-11-02 15:33 [Intel-gfx] [PATCH i-g-t] " Chris Wilson
2020-11-02 17:14 ` Tvrtko Ursulin
2020-11-02 18:13   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.