intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
@ 2020-03-11 10:49 Chris Wilson
  2020-03-12  9:18 ` [Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915/selftests: Add request throughput measurement to perf (rev2) Patchwork
  2020-03-12 13:34 ` [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf Tvrtko Ursulin
  0 siblings, 2 replies; 10+ messages in thread
From: Chris Wilson @ 2020-03-11 10:49 UTC (permalink / raw)
  To: intel-gfx

Under ideal circumstances, the driver should be able to keep the GPU
fully saturated with work. Measure how close to ideal we get under the
harshest of conditions with no user payload.

v2: Also measure throughput using only one thread.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_request.c | 589 +++++++++++++++++-
 2 files changed, 589 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index 3bf7f53e9924..d8da142985eb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -16,5 +16,6 @@
  * Tests are executed in order by igt/i915_selftest
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(request, i915_request_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index f89d9c42f1fa..274b9462a7c9 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prime_numbers.h>
+#include <linux/pm_qos.h>
 
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
@@ -1233,7 +1234,7 @@ static int live_parallel_engines(void *arg)
 		struct igt_live_test t;
 		unsigned int idx;
 
-		snprintf(name, sizeof(name), "%pS", fn);
+		snprintf(name, sizeof(name), "%ps", *fn);
 		err = igt_live_test_begin(&t, i915, __func__, name);
 		if (err)
 			break;
@@ -1470,3 +1471,589 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+static int switch_to_kernel_sync(struct intel_context *ce, int err)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	while (!err && !intel_engine_is_idle(ce->engine))
+		intel_engine_flush_submission(ce->engine);
+
+	return err;
+}
+
+struct perf_stats {
+	struct intel_engine_cs *engine;
+	unsigned long count;
+	ktime_t time;
+	ktime_t busy;
+	u64 runtime;
+};
+
+struct perf_series {
+	struct drm_i915_private *i915;
+	unsigned int nengines;
+	struct intel_context *ce[];
+};
+
+static int s_sync0(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return err;
+}
+
+static int s_sync1(void *arg)
+{
+	struct perf_series *ps = arg;
+	struct i915_request *prev = NULL;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+
+	return err;
+}
+
+static int s_many(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
+
+		i915_request_add(rq);
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return 0;
+}
+
+static int perf_series_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		s_sync0,
+		s_sync1,
+		s_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct perf_stats *stats;
+	struct perf_series *ps;
+	unsigned int idx;
+	int err = 0;
+
+	stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
+	if (!ps) {
+		kfree(stats);
+		return -ENOMEM;
+	}
+
+	ps->i915 = i915;
+	ps->nengines = nengines;
+
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			goto out;
+
+		err = intel_context_pin(ce);
+		if (err) {
+			intel_context_put(ce);
+			goto out;
+		}
+
+		ps->ce[idx++] = ce;
+	}
+	GEM_BUG_ON(idx != ps->nengines);
+
+	for (fn = func; *fn && !err; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p =
+				memset(&stats[idx], 0, sizeof(stats[idx]));
+			struct intel_context *ce = ps->ce[idx];
+
+			p->engine = ps->ce[idx]->engine;
+			intel_engine_pm_get(p->engine);
+
+			if (intel_engine_supports_stats(p->engine) &&
+			    !intel_enable_engine_stats(p->engine))
+				p->busy = intel_engine_get_busy_time(p->engine) + 1;
+			p->runtime = -intel_context_get_total_runtime_ns(ce);
+			p->time = ktime_get();
+		}
+
+		err = (*fn)(ps);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p = &stats[idx];
+			struct intel_context *ce = ps->ce[idx];
+			int integer, decimal;
+			u64 busy, dt;
+
+			p->time = ktime_sub(ktime_get(), p->time);
+			if (p->busy) {
+				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
+						    p->busy - 1);
+				intel_disable_engine_stats(p->engine);
+			}
+
+			err = switch_to_kernel_sync(ce, err);
+			p->runtime += intel_context_get_total_runtime_ns(ce);
+			intel_engine_pm_put(p->engine);
+
+			busy = 100 * ktime_to_ns(p->busy);
+			dt = ktime_to_ns(p->time);
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			pr_info("%s %5s: { busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, p->engine->name, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+		}
+	}
+
+out:
+	for (idx = 0; idx < nengines; idx++) {
+		if (IS_ERR_OR_NULL(ps->ce[idx]))
+			break;
+
+		intel_context_unpin(ps->ce[idx]);
+		intel_context_put(ps->ce[idx]);
+	}
+	kfree(ps);
+
+	if (qos) {
+		pm_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(stats);
+	return err;
+}
+
+static int p_sync0(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_sync1(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct i915_request *prev = NULL;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_many(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+	bool busy;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	count = 0;
+	p->time = ktime_get();
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		p_sync0,
+		p_sync1,
+		p_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct {
+		struct perf_stats p;
+		struct task_struct *tsk;
+	} *engines;
+	int err = 0;
+
+	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	for (fn = func; *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			intel_engine_pm_get(engine);
+
+			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+			engines[idx].p.engine = engine;
+
+			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+						       "igt:%s", engine->name);
+			if (IS_ERR(engines[idx].tsk)) {
+				err = PTR_ERR(engines[idx].tsk);
+				intel_engine_pm_put(engine);
+				break;
+			}
+			get_task_struct(engines[idx++].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(engines[idx].tsk))
+				break;
+
+			status = kthread_stop(engines[idx].tsk);
+			if (status && !err)
+				err = status;
+
+			intel_engine_pm_put(engine);
+			put_task_struct(engines[idx++].tsk);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			struct perf_stats *p = &engines[idx].p;
+			u64 busy = 100 * ktime_to_ns(p->busy);
+			u64 dt = ktime_to_ns(p->time);
+			int integer, decimal;
+
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			GEM_BUG_ON(engine != p->engine);
+			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, engine->name, p->count, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+			idx++;
+		}
+	}
+
+	if (qos) {
+		pm_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(engines);
+	return err;
+}
+
+int i915_request_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_series_engines),
+		SUBTEST(perf_parallel_engines),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915/selftests: Add request throughput measurement to perf (rev2)
  2020-03-11 10:49 [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
@ 2020-03-12  9:18 ` Patchwork
  2020-03-12 13:34 ` [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf Tvrtko Ursulin
  1 sibling, 0 replies; 10+ messages in thread
From: Patchwork @ 2020-03-12  9:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/selftests: Add request throughput measurement to perf (rev2)
URL   : https://patchwork.freedesktop.org/series/73930/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8120_full -> Patchwork_16926_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_16926_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_16926_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_16926_full:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@execlists:
    - shard-skl:          [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-skl4/igt@i915_selftest@live@execlists.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-skl9/igt@i915_selftest@live@execlists.html

  
New tests
---------

  New tests have been introduced between CI_DRM_8120_full and Patchwork_16926_full:

### New IGT tests (1) ###

  * igt@i915_selftest@perf@request:
    - Statuses : 6 pass(s)
    - Exec time: [3.48, 5.68] s

  

Known issues
------------

  Here are the changes found in Patchwork_16926_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_isolation@vcs1-dirty-create:
    - shard-iclb:         [PASS][3] -> [SKIP][4] ([fdo#112080]) +17 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb4/igt@gem_ctx_isolation@vcs1-dirty-create.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb5/igt@gem_ctx_isolation@vcs1-dirty-create.html

  * igt@gem_exec_async@concurrent-writes-blt:
    - shard-tglb:         [PASS][5] -> [FAIL][6] ([i915#1331])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-tglb8/igt@gem_exec_async@concurrent-writes-blt.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-tglb3/igt@gem_exec_async@concurrent-writes-blt.html

  * igt@gem_exec_schedule@implicit-read-write-bsd1:
    - shard-iclb:         [PASS][7] -> [SKIP][8] ([fdo#109276] / [i915#677]) +4 similar issues
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb4/igt@gem_exec_schedule@implicit-read-write-bsd1.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb5/igt@gem_exec_schedule@implicit-read-write-bsd1.html

  * igt@gem_exec_schedule@in-order-bsd:
    - shard-iclb:         [PASS][9] -> [SKIP][10] ([fdo#112146]) +5 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb7/igt@gem_exec_schedule@in-order-bsd.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb1/igt@gem_exec_schedule@in-order-bsd.html

  * igt@gem_exec_schedule@pi-distinct-iova-bsd:
    - shard-iclb:         [PASS][11] -> [SKIP][12] ([i915#677]) +2 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb7/igt@gem_exec_schedule@pi-distinct-iova-bsd.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb2/igt@gem_exec_schedule@pi-distinct-iova-bsd.html

  * igt@gem_exec_suspend@basic-s3:
    - shard-kbl:          [PASS][13] -> [DMESG-WARN][14] ([i915#180]) +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-kbl2/igt@gem_exec_suspend@basic-s3.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-kbl7/igt@gem_exec_suspend@basic-s3.html

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [PASS][15] -> [SKIP][16] ([fdo#109349])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb5/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  * igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-ytiled:
    - shard-skl:          [PASS][17] -> [FAIL][18] ([i915#52] / [i915#54])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-skl1/igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-ytiled.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-skl1/igt@kms_draw_crc@draw-method-xrgb8888-mmap-cpu-ytiled.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-apl:          [PASS][19] -> [DMESG-WARN][20] ([i915#180]) +1 similar issue
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-apl8/igt@kms_flip@flip-vs-suspend-interruptible.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-apl1/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_flip@plain-flip-fb-recreate:
    - shard-skl:          [PASS][21] -> [FAIL][22] ([i915#34])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-skl2/igt@kms_flip@plain-flip-fb-recreate.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-skl5/igt@kms_flip@plain-flip-fb-recreate.html

  * igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-mmap-gtt:
    - shard-skl:          [PASS][23] -> [FAIL][24] ([i915#49])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-skl1/igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-mmap-gtt.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-skl1/igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-shrfb-draw-mmap-gtt.html

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min:
    - shard-skl:          [PASS][25] -> [FAIL][26] ([fdo#108145])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-skl1/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-skl1/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [PASS][27] -> [FAIL][28] ([fdo#108145] / [i915#265])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-skl7/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-skl8/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [PASS][29] -> [SKIP][30] ([fdo#109441]) +2 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb6/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@kms_psr@suspend:
    - shard-skl:          [PASS][31] -> [INCOMPLETE][32] ([i915#198])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-skl8/igt@kms_psr@suspend.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-skl7/igt@kms_psr@suspend.html

  * igt@kms_setmode@basic:
    - shard-kbl:          [PASS][33] -> [FAIL][34] ([i915#31])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-kbl7/igt@kms_setmode@basic.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-kbl6/igt@kms_setmode@basic.html

  * igt@prime_busy@hang-bsd2:
    - shard-iclb:         [PASS][35] -> [SKIP][36] ([fdo#109276]) +19 similar issues
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb1/igt@prime_busy@hang-bsd2.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb6/igt@prime_busy@hang-bsd2.html

  
#### Possible fixes ####

  * igt@gem_busy@busy-vcs1:
    - shard-iclb:         [SKIP][37] ([fdo#112080]) -> [PASS][38] +9 similar issues
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb3/igt@gem_busy@busy-vcs1.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb2/igt@gem_busy@busy-vcs1.html

  * igt@gem_ctx_persistence@close-replace-race:
    - shard-kbl:          [INCOMPLETE][39] ([i915#1402]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-kbl7/igt@gem_ctx_persistence@close-replace-race.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-kbl3/igt@gem_ctx_persistence@close-replace-race.html

  * igt@gem_exec_balancer@smoke:
    - shard-iclb:         [SKIP][41] ([fdo#110854]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb3/igt@gem_exec_balancer@smoke.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb2/igt@gem_exec_balancer@smoke.html

  * igt@gem_exec_schedule@implicit-write-read-bsd2:
    - shard-iclb:         [SKIP][43] ([fdo#109276] / [i915#677]) -> [PASS][44]
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb5/igt@gem_exec_schedule@implicit-write-read-bsd2.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb2/igt@gem_exec_schedule@implicit-write-read-bsd2.html

  * igt@gem_exec_schedule@out-order-bsd2:
    - shard-iclb:         [SKIP][45] ([fdo#109276]) -> [PASS][46] +12 similar issues
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb5/igt@gem_exec_schedule@out-order-bsd2.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb2/igt@gem_exec_schedule@out-order-bsd2.html

  * igt@gem_exec_schedule@pi-shared-iova-bsd:
    - shard-iclb:         [SKIP][47] ([i915#677]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb4/igt@gem_exec_schedule@pi-shared-iova-bsd.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb5/igt@gem_exec_schedule@pi-shared-iova-bsd.html

  * igt@gem_exec_schedule@preempt-other-chain-bsd:
    - shard-iclb:         [SKIP][49] ([fdo#112146]) -> [PASS][50] +6 similar issues
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb2/igt@gem_exec_schedule@preempt-other-chain-bsd.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb5/igt@gem_exec_schedule@preempt-other-chain-bsd.html

  * igt@gem_partial_pwrite_pread@writes-after-reads-display:
    - shard-iclb:         [TIMEOUT][51] -> [PASS][52]
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb2/igt@gem_partial_pwrite_pread@writes-after-reads-display.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb5/igt@gem_partial_pwrite_pread@writes-after-reads-display.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-kbl:          [DMESG-WARN][53] ([i915#180]) -> [PASS][54] +3 similar issues
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-kbl1/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-kbl4/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * igt@kms_flip@2x-flip-vs-expired-vblank-interruptible:
    - shard-glk:          [FAIL][55] ([i915#79]) -> [PASS][56]
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-glk6/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-glk5/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-hsw:          [INCOMPLETE][57] ([i915#61]) -> [PASS][58]
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-hsw2/igt@kms_flip@flip-vs-suspend-interruptible.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-hsw6/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_flip@plain-flip-fb-recreate-interruptible:
    - shard-apl:          [FAIL][59] ([i915#34]) -> [PASS][60]
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-apl7/igt@kms_flip@plain-flip-fb-recreate-interruptible.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-apl3/igt@kms_flip@plain-flip-fb-recreate-interruptible.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - shard-skl:          [INCOMPLETE][61] ([i915#69]) -> [PASS][62]
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-skl1/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-skl8/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          [FAIL][63] ([fdo#108145] / [i915#265]) -> [PASS][64]
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-skl10/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-skl3/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html

  * igt@kms_plane_lowres@pipe-a-tiling-y:
    - shard-glk:          [FAIL][65] ([i915#899]) -> [PASS][66]
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-glk6/igt@kms_plane_lowres@pipe-a-tiling-y.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-glk5/igt@kms_plane_lowres@pipe-a-tiling-y.html

  * igt@kms_psr@psr2_cursor_blt:
    - shard-iclb:         [SKIP][67] ([fdo#109441]) -> [PASS][68] +1 similar issue
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb3/igt@kms_psr@psr2_cursor_blt.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb2/igt@kms_psr@psr2_cursor_blt.html

  * igt@kms_vblank@pipe-c-ts-continuation-suspend:
    - shard-apl:          [DMESG-WARN][69] ([i915#180]) -> [PASS][70] +2 similar issues
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-apl4/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-apl3/igt@kms_vblank@pipe-c-ts-continuation-suspend.html

  
#### Warnings ####

  * igt@gem_ctx_persistence@close-replace-race:
    - shard-iclb:         [INCOMPLETE][71] ([i915#1402]) -> [TIMEOUT][72] ([i915#1340])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-iclb8/igt@gem_ctx_persistence@close-replace-race.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-iclb4/igt@gem_ctx_persistence@close-replace-race.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-tglb:         [FAIL][73] ([i915#454]) -> [SKIP][74] ([i915#468])
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-tglb1/igt@i915_pm_dc@dc6-psr.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-tglb2/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_pm_rpm@debugfs-forcewake-user:
    - shard-snb:          [SKIP][75] ([fdo#109271]) -> [INCOMPLETE][76] ([i915#82])
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-snb5/igt@i915_pm_rpm@debugfs-forcewake-user.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-snb6/igt@i915_pm_rpm@debugfs-forcewake-user.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-kbl:          [INCOMPLETE][77] -> [DMESG-WARN][78] ([i915#180])
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-kbl1/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-kbl7/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@runner@aborted:
    - shard-kbl:          ([FAIL][79], [FAIL][80]) ([i915#1389] / [i915#1402] / [i915#92]) -> [FAIL][81] ([i915#92])
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-kbl7/igt@runner@aborted.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8120/shard-kbl7/igt@runner@aborted.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/shard-kbl2/igt@runner@aborted.html

  
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#110854]: https://bugs.freedesktop.org/show_bug.cgi?id=110854
  [fdo#112080]: https://bugs.freedesktop.org/show_bug.cgi?id=112080
  [fdo#112146]: https://bugs.freedesktop.org/show_bug.cgi?id=112146
  [i915#1331]: https://gitlab.freedesktop.org/drm/intel/issues/1331
  [i915#1340]: https://gitlab.freedesktop.org/drm/intel/issues/1340
  [i915#1389]: https://gitlab.freedesktop.org/drm/intel/issues/1389
  [i915#1402]: https://gitlab.freedesktop.org/drm/intel/issues/1402
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#198]: https://gitlab.freedesktop.org/drm/intel/issues/198
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#34]: https://gitlab.freedesktop.org/drm/intel/issues/34
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#468]: https://gitlab.freedesktop.org/drm/intel/issues/468
  [i915#49]: https://gitlab.freedesktop.org/drm/intel/issues/49
  [i915#52]: https://gitlab.freedesktop.org/drm/intel/issues/52
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#61]: https://gitlab.freedesktop.org/drm/intel/issues/61
  [i915#677]: https://gitlab.freedesktop.org/drm/intel/issues/677
  [i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#82]: https://gitlab.freedesktop.org/drm/intel/issues/82
  [i915#899]: https://gitlab.freedesktop.org/drm/intel/issues/899
  [i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8120 -> Patchwork_16926

  CI-20190529: 20190529
  CI_DRM_8120: ce66c439df71f01b018803664c4a50fc61255788 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5505: 8973d811f3fdfb4ace4aabab2095ce0309881648 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_16926: 773240d23fbd5bb95a8089fe54aa6ecc42d234e7 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16926/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-03-11 10:49 [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
  2020-03-12  9:18 ` [Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915/selftests: Add request throughput measurement to perf (rev2) Patchwork
@ 2020-03-12 13:34 ` Tvrtko Ursulin
  1 sibling, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2020-03-12 13:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/03/2020 10:49, Chris Wilson wrote:
> Under ideal circumstances, the driver should be able to keep the GPU
> fully saturated with work. Measure how close to ideal we get under the
> harshest of conditions with no user payload.
> 
> v2: Also measure throughput using only one thread.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
>   drivers/gpu/drm/i915/selftests/i915_request.c | 589 +++++++++++++++++-
>   2 files changed, 589 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> index 3bf7f53e9924..d8da142985eb 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> @@ -16,5 +16,6 @@
>    * Tests are executed in order by igt/i915_selftest
>    */
>   selftest(engine_cs, intel_engine_cs_perf_selftests)
> +selftest(request, i915_request_perf_selftests)
>   selftest(blt, i915_gem_object_blt_perf_selftests)
>   selftest(region, intel_memory_region_perf_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index f89d9c42f1fa..274b9462a7c9 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -23,6 +23,7 @@
>    */
>   
>   #include <linux/prime_numbers.h>
> +#include <linux/pm_qos.h>
>   
>   #include "gem/i915_gem_pm.h"
>   #include "gem/selftests/mock_context.h"
> @@ -1233,7 +1234,7 @@ static int live_parallel_engines(void *arg)
>   		struct igt_live_test t;
>   		unsigned int idx;
>   
> -		snprintf(name, sizeof(name), "%pS", fn);
> +		snprintf(name, sizeof(name), "%ps", *fn);
>   		err = igt_live_test_begin(&t, i915, __func__, name);
>   		if (err)
>   			break;
> @@ -1470,3 +1471,589 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
>   
>   	return i915_subtests(tests, i915);
>   }
> +
> +static int switch_to_kernel_sync(struct intel_context *ce, int err)
> +{
> +	struct i915_request *rq;
> +	struct dma_fence *fence;
> +
> +	rq = intel_engine_create_kernel_request(ce->engine);
> +	if (IS_ERR(rq))
> +		return PTR_ERR(rq);
> +
> +	fence = i915_active_fence_get(&ce->timeline->last_request);
> +	if (fence) {
> +		i915_request_await_dma_fence(rq, fence);
> +		dma_fence_put(fence);
> +	}
> +
> +	rq = i915_request_get(rq);
> +	i915_request_add(rq);
> +	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
> +		err = -ETIME;
> +	i915_request_put(rq);
> +
> +	while (!err && !intel_engine_is_idle(ce->engine))
> +		intel_engine_flush_submission(ce->engine);
> +
> +	return err;
> +}
> +
> +struct perf_stats {
> +	struct intel_engine_cs *engine;
> +	unsigned long count;
> +	ktime_t time;
> +	ktime_t busy;
> +	u64 runtime;
> +};
> +
> +struct perf_series {
> +	struct drm_i915_private *i915;
> +	unsigned int nengines;
> +	struct intel_context *ce[];
> +};
> +
> +static int s_sync0(void *arg)
> +{
> +	struct perf_series *ps = arg;
> +	IGT_TIMEOUT(end_time);
> +	unsigned int idx = 0;
> +	int err = 0;
> +
> +	GEM_BUG_ON(!ps->nengines);
> +	do {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_create(ps->ce[idx]);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		i915_request_get(rq);
> +		i915_request_add(rq);
> +
> +		if (i915_request_wait(rq, 0, HZ / 5) < 0)
> +			err = -ETIME;
> +		i915_request_put(rq);
> +		if (err)
> +			break;
> +
> +		if (++idx == ps->nengines)
> +			idx = 0;
> +	} while (!__igt_timeout(end_time, NULL));
> +
> +	return err;
> +}
> +
> +static int s_sync1(void *arg)
> +{
> +	struct perf_series *ps = arg;
> +	struct i915_request *prev = NULL;
> +	IGT_TIMEOUT(end_time);
> +	unsigned int idx = 0;
> +	int err = 0;
> +
> +	GEM_BUG_ON(!ps->nengines);
> +	do {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_create(ps->ce[idx]);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		i915_request_get(rq);
> +		i915_request_add(rq);
> +
> +		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
> +			err = -ETIME;
> +		i915_request_put(prev);
> +		prev = rq;
> +		if (err)
> +			break;
> +
> +		if (++idx == ps->nengines)
> +			idx = 0;
> +	} while (!__igt_timeout(end_time, NULL));
> +	i915_request_put(prev);
> +
> +	return err;
> +}
> +
> +static int s_many(void *arg)
> +{
> +	struct perf_series *ps = arg;
> +	IGT_TIMEOUT(end_time);
> +	unsigned int idx = 0;
> +
> +	GEM_BUG_ON(!ps->nengines);
> +	do {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_create(ps->ce[idx]);
> +		if (IS_ERR(rq))
> +			return PTR_ERR(rq);
> +
> +		i915_request_add(rq);
> +
> +		if (++idx == ps->nengines)
> +			idx = 0;
> +	} while (!__igt_timeout(end_time, NULL));
> +
> +	return 0;
> +}
> +
> +static int perf_series_engines(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	static int (* const func[])(void *arg) = {
> +		s_sync0,
> +		s_sync1,
> +		s_many,
> +		NULL,
> +	};
> +	const unsigned int nengines = num_uabi_engines(i915);
> +	struct intel_engine_cs *engine;
> +	int (* const *fn)(void *arg);
> +	struct pm_qos_request *qos;
> +	struct perf_stats *stats;
> +	struct perf_series *ps;
> +	unsigned int idx;
> +	int err = 0;
> +
> +	stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
> +	if (!stats)
> +		return -ENOMEM;
> +
> +	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
> +	if (qos)
> +		pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
> +
> +	ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
> +	if (!ps) {
> +		kfree(stats);
> +		return -ENOMEM;

Leaks qos. Maybe just fail if failed to allocate qos both for simplicity 
and reproducability.

> +	}
> +
> +	ps->i915 = i915;
> +	ps->nengines = nengines;
> +
> +	idx = 0;
> +	for_each_uabi_engine(engine, i915) {
> +		struct intel_context *ce;
> +
> +		ce = intel_context_create(engine);
> +		if (IS_ERR(ce))
> +			goto out;
> +
> +		err = intel_context_pin(ce);
> +		if (err) {
> +			intel_context_put(ce);
> +			goto out;
> +		}
> +
> +		ps->ce[idx++] = ce;
> +	}
> +	GEM_BUG_ON(idx != ps->nengines);

After idx++ would be safer.

> +
> +	for (fn = func; *fn && !err; fn++) {
> +		char name[KSYM_NAME_LEN];
> +		struct igt_live_test t;
> +
> +		snprintf(name, sizeof(name), "%ps", *fn);
> +		err = igt_live_test_begin(&t, i915, __func__, name);
> +		if (err)
> +			break;
> +
> +		for (idx = 0; idx < nengines; idx++) {
> +			struct perf_stats *p =
> +				memset(&stats[idx], 0, sizeof(stats[idx]));
> +			struct intel_context *ce = ps->ce[idx];
> +
> +			p->engine = ps->ce[idx]->engine;
> +			intel_engine_pm_get(p->engine);
> +
> +			if (intel_engine_supports_stats(p->engine) &&
> +			    !intel_enable_engine_stats(p->engine))
> +				p->busy = intel_engine_get_busy_time(p->engine) + 1;
> +			p->runtime = -intel_context_get_total_runtime_ns(ce);
> +			p->time = ktime_get();
> +		}
> +
> +		err = (*fn)(ps);
> +		if (igt_live_test_end(&t))
> +			err = -EIO;
> +
> +		for (idx = 0; idx < nengines; idx++) {
> +			struct perf_stats *p = &stats[idx];
> +			struct intel_context *ce = ps->ce[idx];
> +			int integer, decimal;
> +			u64 busy, dt;
> +
> +			p->time = ktime_sub(ktime_get(), p->time);
> +			if (p->busy) {
> +				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
> +						    p->busy - 1);
> +				intel_disable_engine_stats(p->engine);
> +			}
> +
> +			err = switch_to_kernel_sync(ce, err);
> +			p->runtime += intel_context_get_total_runtime_ns(ce);
> +			intel_engine_pm_put(p->engine);
> +
> +			busy = 100 * ktime_to_ns(p->busy);
> +			dt = ktime_to_ns(p->time);
> +			if (dt) {
> +				integer = div64_u64(busy, dt);
> +				busy -= integer * dt;
> +				decimal = div64_u64(100 * busy, dt);
> +			} else {
> +				integer = 0;
> +				decimal = 0;
> +			}
> +
> +			pr_info("%s %5s: { busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
> +				name, p->engine->name, integer, decimal,
> +				div_u64(p->runtime, 1000 * 1000),
> +				div_u64(ktime_to_ns(p->time), 1000 * 1000));
> +		}
> +	}
> +
> +out:
> +	for (idx = 0; idx < nengines; idx++) {
> +		if (IS_ERR_OR_NULL(ps->ce[idx]))
> +			break;
> +
> +		intel_context_unpin(ps->ce[idx]);
> +		intel_context_put(ps->ce[idx]);
> +	}
> +	kfree(ps);
> +
> +	if (qos) {
> +		pm_qos_remove_request(qos);
> +		kfree(qos);
> +	}
> +	kfree(stats);
> +	return err;
> +}
> +
> +static int p_sync0(void *arg)
> +{
> +	struct perf_stats *p = arg;
> +	struct intel_engine_cs *engine = p->engine;
> +	struct intel_context *ce;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long count;
> +	bool busy;
> +	int err = 0;
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
> +	err = intel_context_pin(ce);
> +	if (err) {
> +		intel_context_put(ce);
> +		return err;
> +	}
> +
> +	busy = false;
> +	if (intel_engine_supports_stats(engine) &&
> +	    !intel_enable_engine_stats(engine)) {
> +		p->busy = intel_engine_get_busy_time(engine);
> +		busy = true;
> +	}
> +
> +	p->time = ktime_get();
> +	count = 0;
> +	do {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_create(ce);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		i915_request_get(rq);
> +		i915_request_add(rq);
> +
> +		err = 0;
> +		if (i915_request_wait(rq, 0, HZ / 5) < 0)
> +			err = -ETIME;
> +		i915_request_put(rq);
> +		if (err)
> +			break;
> +
> +		count++;
> +	} while (!__igt_timeout(end_time, NULL));

Couldn't you use s_sync0 (etc) for the test bodies here?

> +	p->time = ktime_sub(ktime_get(), p->time);
> +
> +	if (busy) {
> +		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> +				    p->busy);
> +		intel_disable_engine_stats(engine);
> +	}
> +
> +	err = switch_to_kernel_sync(ce, err);
> +	p->runtime = intel_context_get_total_runtime_ns(ce);
> +	p->count = count;
> +
> +	intel_context_unpin(ce);
> +	intel_context_put(ce);
> +	return err;
> +}
> +
> +static int p_sync1(void *arg)
> +{
> +	struct perf_stats *p = arg;
> +	struct intel_engine_cs *engine = p->engine;
> +	struct i915_request *prev = NULL;
> +	struct intel_context *ce;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long count;
> +	bool busy;
> +	int err = 0;
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
> +	err = intel_context_pin(ce);
> +	if (err) {
> +		intel_context_put(ce);
> +		return err;
> +	}
> +
> +	busy = false;
> +	if (intel_engine_supports_stats(engine) &&
> +	    !intel_enable_engine_stats(engine)) {
> +		p->busy = intel_engine_get_busy_time(engine);
> +		busy = true;
> +	}
> +
> +	p->time = ktime_get();
> +	count = 0;
> +	do {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_create(ce);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		i915_request_get(rq);
> +		i915_request_add(rq);
> +
> +		err = 0;
> +		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
> +			err = -ETIME;
> +		i915_request_put(prev);
> +		prev = rq;
> +		if (err)
> +			break;
> +
> +		count++;
> +	} while (!__igt_timeout(end_time, NULL));
> +	i915_request_put(prev);
> +	p->time = ktime_sub(ktime_get(), p->time);
> +
> +	if (busy) {
> +		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> +				    p->busy);
> +		intel_disable_engine_stats(engine);
> +	}
> +
> +	err = switch_to_kernel_sync(ce, err);
> +	p->runtime = intel_context_get_total_runtime_ns(ce);
> +	p->count = count;
> +
> +	intel_context_unpin(ce);
> +	intel_context_put(ce);

Setup and teardown code is also common in all p_ tests.

> +	return err;
> +}
> +
> +static int p_many(void *arg)
> +{
> +	struct perf_stats *p = arg;
> +	struct intel_engine_cs *engine = p->engine;
> +	struct intel_context *ce;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long count;
> +	int err = 0;
> +	bool busy;
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
> +	err = intel_context_pin(ce);
> +	if (err) {
> +		intel_context_put(ce);
> +		return err;
> +	}
> +
> +	busy = false;
> +	if (intel_engine_supports_stats(engine) &&
> +	    !intel_enable_engine_stats(engine)) {
> +		p->busy = intel_engine_get_busy_time(engine);
> +		busy = true;
> +	}
> +
> +	count = 0;
> +	p->time = ktime_get();
> +	do {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_create(ce);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		i915_request_add(rq);
> +		count++;
> +	} while (!__igt_timeout(end_time, NULL));
> +	p->time = ktime_sub(ktime_get(), p->time);
> +
> +	if (busy) {
> +		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> +				    p->busy);
> +		intel_disable_engine_stats(engine);
> +	}
> +
> +	err = switch_to_kernel_sync(ce, err);
> +	p->runtime = intel_context_get_total_runtime_ns(ce);
> +	p->count = count;
> +
> +	intel_context_unpin(ce);
> +	intel_context_put(ce);
> +	return err;
> +}
> +
> +static int perf_parallel_engines(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	static int (* const func[])(void *arg) = {
> +		p_sync0,
> +		p_sync1,
> +		p_many,
> +		NULL,
> +	};
> +	const unsigned int nengines = num_uabi_engines(i915);
> +	struct intel_engine_cs *engine;
> +	int (* const *fn)(void *arg);
> +	struct pm_qos_request *qos;
> +	struct {
> +		struct perf_stats p;
> +		struct task_struct *tsk;
> +	} *engines;
> +	int err = 0;
> +
> +	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
> +	if (!engines)
> +		return -ENOMEM;
> +
> +	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
> +	if (qos)
> +		pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
> +
> +	for (fn = func; *fn; fn++) {
> +		char name[KSYM_NAME_LEN];
> +		struct igt_live_test t;
> +		unsigned int idx;
> +
> +		snprintf(name, sizeof(name), "%ps", *fn);
> +		err = igt_live_test_begin(&t, i915, __func__, name);
> +		if (err)
> +			break;
> +
> +		atomic_set(&i915->selftest.counter, nengines);
> +
> +		idx = 0;
> +		for_each_uabi_engine(engine, i915) {
> +			intel_engine_pm_get(engine);
> +
> +			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
> +			engines[idx].p.engine = engine;
> +
> +			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
> +						       "igt:%s", engine->name);
> +			if (IS_ERR(engines[idx].tsk)) {
> +				err = PTR_ERR(engines[idx].tsk);
> +				intel_engine_pm_put(engine);
> +				break;
> +			}
> +			get_task_struct(engines[idx++].tsk);
> +		}
> +
> +		yield(); /* start all threads before we kthread_stop() */
> +
> +		idx = 0;
> +		for_each_uabi_engine(engine, i915) {
> +			int status;
> +
> +			if (IS_ERR(engines[idx].tsk))
> +				break;
> +
> +			status = kthread_stop(engines[idx].tsk);
> +			if (status && !err)
> +				err = status;
> +
> +			intel_engine_pm_put(engine);
> +			put_task_struct(engines[idx++].tsk);
> +		}
> +
> +		if (igt_live_test_end(&t))
> +			err = -EIO;
> +		if (err)
> +			break;
> +
> +		idx = 0;
> +		for_each_uabi_engine(engine, i915) {
> +			struct perf_stats *p = &engines[idx].p;
> +			u64 busy = 100 * ktime_to_ns(p->busy);
> +			u64 dt = ktime_to_ns(p->time);
> +			int integer, decimal;
> +
> +			if (dt) {
> +				integer = div64_u64(busy, dt);
> +				busy -= integer * dt;
> +				decimal = div64_u64(100 * busy, dt);
> +			} else {
> +				integer = 0;
> +				decimal = 0;
> +			}

This could also go to common like perf_stats_to_integer(p, &i, &d).

> +
> +			GEM_BUG_ON(engine != p->engine);
> +			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
> +				name, engine->name, p->count, integer, decimal,
> +				div_u64(p->runtime, 1000 * 1000),
> +				div_u64(ktime_to_ns(p->time), 1000 * 1000));
> +			idx++;
> +		}
> +	}
> +
> +	if (qos) {
> +		pm_qos_remove_request(qos);
> +		kfree(qos);
> +	}
> +	kfree(engines);
> +	return err;
> +}
> +
> +int i915_request_perf_selftests(struct drm_i915_private *i915)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(perf_series_engines),
> +		SUBTEST(perf_parallel_engines),
> +	};
> +
> +	if (intel_gt_is_wedged(&i915->gt))
> +		return 0;
> +
> +	return i915_subtests(tests, i915);
> +}
> 

Mostly okay, just a little bit too much copy & paste.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-23 14:18       ` Andi Shyti
@ 2020-04-23 14:59         ` Chris Wilson
  0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2020-04-23 14:59 UTC (permalink / raw)
  To: Andi Shyti; +Cc: intel-gfx

Quoting Andi Shyti (2020-04-23 15:18:47)
> Hi Chris,
> 
> > > > +static int s_many(void *arg)
> > > > +{
> > > > +     struct perf_series *ps = arg;
> > > 
> > > why do we need to go through void... all functions are taking a
> > > perf_series structure.
> > 
> > The kthread API defines the function pointer as int (*fn)(void *arg);
> 
> In the parallel tes, not in the serial test. right? But, OK,
> it's not a big deal.

Imagine I copy-paste the kthread_run as well :)

> > > > +                     p->time = ktime_sub(ktime_get(), p->time);
> > > > +                     if (p->busy) {
> > > > +                             p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
> > > > +                                                 p->busy - 1);
> > > > +                             intel_disable_engine_stats(p->engine);
> > > > +                     }
> > > > +
> > > > +                     err = switch_to_kernel_sync(ce, err);
> > > 
> > > how about err?
> > 
> > It's a case of where we need to flush all the engines regardless of the
> > error, so we wait until the end.
> 
> so you put it there to remove also the warning :)

The idea was that it would keep the error until the end, trying to flush
as it went.

> > > > +                     p->runtime += intel_context_get_total_runtime_ns(ce);
> > > 
> > > assigning a negative value to an unsigned so that you can add it
> > > later? looks nice but odd :)
> > > 
> > > It's easier to understand if we do
> > > 
> > > p->runtime = intel_context_get_total_runtime_ns(ce) - p->runtime;
> > > 
> > > if you like it, no need to change, though.
> > 
> > I've done both. I'm fond of the unsigned addition of a negative number.
> > It's a few instructions less. :-p
> 
> Yes, nevertheless, I like it, even though it's somehow an abuse
> of the concept of unsigned.

It's not negative, just a large positive number!

> > > > +     count = 0;
> > > > +     p->time = ktime_get();
> > > > +     do {
> > > > +             struct i915_request *rq;
> > > > +
> > > > +             rq = i915_request_create(ce);
> > > > +             if (IS_ERR(rq)) {
> > > > +                     err = PTR_ERR(rq);
> > > > +                     break;
> > > > +             }
> > > > +
> > > > +             i915_request_add(rq);
> > > 
> > > do we need a request_put as well?
> > 
> > No. Nasty API, add consumes the reference. It's on the list to update
> > now that we have a ~majority of cases that want to keep a reference to
> > their request.
> 
> arrghh!
> 
> > > > +             for_each_uabi_engine(engine, i915) {
> > > > +                     int status;
> > > > +
> > > > +                     if (IS_ERR(engines[idx].tsk))
> > > > +                             break;
> > > 
> > > if we break here aren't we missing engine_pm_put and put_task?
> > 
> > No. We broke in the earlier loop as well, so the ERR_PTR is the
> > sentinel, indicating the end of the assignments.
> 
> Oh yes, true!
> 
> Reviewed-by: Andi Shyti <andi.shyti@intel.com>
> 
> Now finally, after a month it has been lingering around, you can
> finally push it.

And get back to the real question of how we can improve the backend so
that we can saturate the GPU with the least number of CPUs.

And if we are happy with the kernel, there's the issue of where all the
throughput goes between the kernel and userspace. (The purpose of this
test was to ensure we had no silly bugs inside the kernel that was
causing the sub-par userspace performance. We don't seem to, but it's
also not perfect, plenty of room for improvement.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-23  6:52     ` Chris Wilson
@ 2020-04-23 14:18       ` Andi Shyti
  2020-04-23 14:59         ` Chris Wilson
  0 siblings, 1 reply; 10+ messages in thread
From: Andi Shyti @ 2020-04-23 14:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Hi Chris,

> > > +static int s_many(void *arg)
> > > +{
> > > +     struct perf_series *ps = arg;
> > 
> > why do we need to go through void... all functions are taking a
> > perf_series structure.
> 
> The kthread API defines the function pointer as int (*fn)(void *arg);

In the parallel tes, not in the serial test. right? But, OK,
it's not a big deal.

> > > +                     p->time = ktime_sub(ktime_get(), p->time);
> > > +                     if (p->busy) {
> > > +                             p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
> > > +                                                 p->busy - 1);
> > > +                             intel_disable_engine_stats(p->engine);
> > > +                     }
> > > +
> > > +                     err = switch_to_kernel_sync(ce, err);
> > 
> > how about err?
> 
> It's a case of where we need to flush all the engines regardless of the
> error, so we wait until the end.

so you put it there to remove also the warning :)

> > > +                     p->runtime += intel_context_get_total_runtime_ns(ce);
> > 
> > assigning a negative value to an unsigned so that you can add it
> > later? looks nice but odd :)
> > 
> > It's easier to understand if we do
> > 
> > p->runtime = intel_context_get_total_runtime_ns(ce) - p->runtime;
> > 
> > if you like it, no need to change, though.
> 
> I've done both. I'm fond of the unsigned addition of a negative number.
> It's a few instructions less. :-p

Yes, nevertheless, I like it, even though it's somehow an abuse
of the concept of unsigned.

> > > +     count = 0;
> > > +     p->time = ktime_get();
> > > +     do {
> > > +             struct i915_request *rq;
> > > +
> > > +             rq = i915_request_create(ce);
> > > +             if (IS_ERR(rq)) {
> > > +                     err = PTR_ERR(rq);
> > > +                     break;
> > > +             }
> > > +
> > > +             i915_request_add(rq);
> > 
> > do we need a request_put as well?
> 
> No. Nasty API, add consumes the reference. It's on the list to update
> now that we have a ~majority of cases that want to keep a reference to
> their request.

arrghh!

> > > +             for_each_uabi_engine(engine, i915) {
> > > +                     int status;
> > > +
> > > +                     if (IS_ERR(engines[idx].tsk))
> > > +                             break;
> > 
> > if we break here aren't we missing engine_pm_put and put_task?
> 
> No. We broke in the earlier loop as well, so the ERR_PTR is the
> sentinel, indicating the end of the assignments.

Oh yes, true!

Reviewed-by: Andi Shyti <andi.shyti@intel.com>

Now finally, after a month it has been lingering around, you can
finally push it.

Thanks,
Andi
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-22 22:45   ` Andi Shyti
@ 2020-04-23  6:52     ` Chris Wilson
  2020-04-23 14:18       ` Andi Shyti
  0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2020-04-23  6:52 UTC (permalink / raw)
  To: Andi Shyti; +Cc: intel-gfx

Quoting Andi Shyti (2020-04-22 23:45:29)
> Hi Chris,
> 
> [...]
> 
> > +static int s_many(void *arg)
> > +{
> > +     struct perf_series *ps = arg;
> 
> why do we need to go through void... all functions are taking a
> perf_series structure.

The kthread API defines the function pointer as int (*fn)(void *arg);

> 
> > +     IGT_TIMEOUT(end_time);
> > +     unsigned int idx = 0;
> > +
> 
> [...]
> 
> > +             for (idx = 0; idx < nengines; idx++) {
> > +                     struct perf_stats *p =
> > +                             memset(&stats[idx], 0, sizeof(stats[idx]));
> > +                     struct intel_context *ce = ps->ce[idx];
> > +
> > +                     p->engine = ps->ce[idx]->engine;
> > +                     intel_engine_pm_get(p->engine);
> > +
> > +                     if (intel_engine_supports_stats(p->engine) &&
> > +                         !intel_enable_engine_stats(p->engine))
> > +                             p->busy = intel_engine_get_busy_time(p->engine) + 1;
> > +                     p->runtime = -intel_context_get_total_runtime_ns(ce);
> > +                     p->time = ktime_get();
> > +             }
> > +
> > +             err = (*fn)(ps);
> > +             if (igt_live_test_end(&t))
> > +                     err = -EIO;
> > +
> > +             for (idx = 0; idx < nengines; idx++) {
> > +                     struct perf_stats *p = &stats[idx];
> > +                     struct intel_context *ce = ps->ce[idx];
> > +                     int integer, decimal;
> > +                     u64 busy, dt;
> > +
> > +                     p->time = ktime_sub(ktime_get(), p->time);
> > +                     if (p->busy) {
> > +                             p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
> > +                                                 p->busy - 1);
> > +                             intel_disable_engine_stats(p->engine);
> > +                     }
> > +
> > +                     err = switch_to_kernel_sync(ce, err);
> 
> how about err?

It's a case of where we need to flush all the engines regardless of the
error, so we wait until the end.

> > +                     p->runtime += intel_context_get_total_runtime_ns(ce);
> 
> assigning a negative value to an unsigned so that you can add it
> later? looks nice but odd :)
> 
> It's easier to understand if we do
> 
> p->runtime = intel_context_get_total_runtime_ns(ce) - p->runtime;
> 
> if you like it, no need to change, though.

I've done both. I'm fond of the unsigned addition of a negative number.
It's a few instructions less. :-p

> 
> [...]
> 
> > +static int p_many(void *arg)
> > +{
> > +     struct perf_stats *p = arg;
> > +     struct intel_engine_cs *engine = p->engine;
> > +     struct intel_context *ce;
> > +     IGT_TIMEOUT(end_time);
> > +     unsigned long count;
> > +     int err = 0;
> > +     bool busy;
> > +
> > +     ce = intel_context_create(engine);
> > +     if (IS_ERR(ce))
> > +             return PTR_ERR(ce);
> > +
> > +     err = intel_context_pin(ce);
> > +     if (err) {
> > +             intel_context_put(ce);
> > +             return err;
> > +     }
> > +
> > +     busy = false;
> > +     if (intel_engine_supports_stats(engine) &&
> > +         !intel_enable_engine_stats(engine)) {
> > +             p->busy = intel_engine_get_busy_time(engine);
> > +             busy = true;
> > +     }
> > +
> > +     count = 0;
> > +     p->time = ktime_get();
> > +     do {
> > +             struct i915_request *rq;
> > +
> > +             rq = i915_request_create(ce);
> > +             if (IS_ERR(rq)) {
> > +                     err = PTR_ERR(rq);
> > +                     break;
> > +             }
> > +
> > +             i915_request_add(rq);
> 
> do we need a request_put as well?

No. Nasty API, add consumes the reference. It's on the list to update
now that we have a ~majority of cases that want to keep a reference to
their request.


> 
> > +             count++;
> > +     } while (!__igt_timeout(end_time, NULL));
> > +     p->time = ktime_sub(ktime_get(), p->time);
> > +
> > +     if (busy) {
> > +             p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> > +                                 p->busy);
> > +             intel_disable_engine_stats(engine);
> > +     }
> > +
> > +     err = switch_to_kernel_sync(ce, err);
> > +     p->runtime = intel_context_get_total_runtime_ns(ce);
> > +     p->count = count;
> > +
> > +     intel_context_unpin(ce);
> > +     intel_context_put(ce);
> > +     return err;
> > +}
> 
> [...]
> 
> > +             for_each_uabi_engine(engine, i915) {
> > +                     int status;
> > +
> > +                     if (IS_ERR(engines[idx].tsk))
> > +                             break;
> 
> if we break here aren't we missing engine_pm_put and put_task?

No. We broke in the earlier loop as well, so the ERR_PTR is the
sentinel, indicating the end of the assignments.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-22  7:42 ` [Intel-gfx] [PATCH] " Chris Wilson
@ 2020-04-22 22:45   ` Andi Shyti
  2020-04-23  6:52     ` Chris Wilson
  0 siblings, 1 reply; 10+ messages in thread
From: Andi Shyti @ 2020-04-22 22:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Hi Chris,

[...]

> +static int s_many(void *arg)
> +{
> +	struct perf_series *ps = arg;

why do we need to go through void... all functions are taking a
perf_series structure.

> +	IGT_TIMEOUT(end_time);
> +	unsigned int idx = 0;
> +

[...]

> +		for (idx = 0; idx < nengines; idx++) {
> +			struct perf_stats *p =
> +				memset(&stats[idx], 0, sizeof(stats[idx]));
> +			struct intel_context *ce = ps->ce[idx];
> +
> +			p->engine = ps->ce[idx]->engine;
> +			intel_engine_pm_get(p->engine);
> +
> +			if (intel_engine_supports_stats(p->engine) &&
> +			    !intel_enable_engine_stats(p->engine))
> +				p->busy = intel_engine_get_busy_time(p->engine) + 1;
> +			p->runtime = -intel_context_get_total_runtime_ns(ce);
> +			p->time = ktime_get();
> +		}
> +
> +		err = (*fn)(ps);
> +		if (igt_live_test_end(&t))
> +			err = -EIO;
> +
> +		for (idx = 0; idx < nengines; idx++) {
> +			struct perf_stats *p = &stats[idx];
> +			struct intel_context *ce = ps->ce[idx];
> +			int integer, decimal;
> +			u64 busy, dt;
> +
> +			p->time = ktime_sub(ktime_get(), p->time);
> +			if (p->busy) {
> +				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
> +						    p->busy - 1);
> +				intel_disable_engine_stats(p->engine);
> +			}
> +
> +			err = switch_to_kernel_sync(ce, err);

how about err?

> +			p->runtime += intel_context_get_total_runtime_ns(ce);

assigning a negative value to an unsigned so that you can add it
later? looks nice but odd :)

It's easier to understand if we do

p->runtime = intel_context_get_total_runtime_ns(ce) - p->runtime;

if you like it, no need to change, though.

[...]

> +static int p_many(void *arg)
> +{
> +	struct perf_stats *p = arg;
> +	struct intel_engine_cs *engine = p->engine;
> +	struct intel_context *ce;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long count;
> +	int err = 0;
> +	bool busy;
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
> +	err = intel_context_pin(ce);
> +	if (err) {
> +		intel_context_put(ce);
> +		return err;
> +	}
> +
> +	busy = false;
> +	if (intel_engine_supports_stats(engine) &&
> +	    !intel_enable_engine_stats(engine)) {
> +		p->busy = intel_engine_get_busy_time(engine);
> +		busy = true;
> +	}
> +
> +	count = 0;
> +	p->time = ktime_get();
> +	do {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_create(ce);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		i915_request_add(rq);

do we need a request_put as well?

> +		count++;
> +	} while (!__igt_timeout(end_time, NULL));
> +	p->time = ktime_sub(ktime_get(), p->time);
> +
> +	if (busy) {
> +		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> +				    p->busy);
> +		intel_disable_engine_stats(engine);
> +	}
> +
> +	err = switch_to_kernel_sync(ce, err);
> +	p->runtime = intel_context_get_total_runtime_ns(ce);
> +	p->count = count;
> +
> +	intel_context_unpin(ce);
> +	intel_context_put(ce);
> +	return err;
> +}

[...]

> +		for_each_uabi_engine(engine, i915) {
> +			int status;
> +
> +			if (IS_ERR(engines[idx].tsk))
> +				break;

if we break here aren't we missing engine_pm_put and put_task?

Andi

> +
> +			status = kthread_stop(engines[idx].tsk);
> +			if (status && !err)
> +				err = status;
> +
> +			intel_engine_pm_put(engine);
> +			put_task_struct(engines[idx++].tsk);
> +		}
> +
> +		if (igt_live_test_end(&t))
> +			err = -EIO;
> +		if (err)
> +			break;
> +
> +		idx = 0;
> +		for_each_uabi_engine(engine, i915) {
> +			struct perf_stats *p = &engines[idx].p;
> +			u64 busy = 100 * ktime_to_ns(p->busy);
> +			u64 dt = ktime_to_ns(p->time);
> +			int integer, decimal;
> +
> +			if (dt) {
> +				integer = div64_u64(busy, dt);
> +				busy -= integer * dt;
> +				decimal = div64_u64(100 * busy, dt);
> +			} else {
> +				integer = 0;
> +				decimal = 0;
> +			}
> +
> +			GEM_BUG_ON(engine != p->engine);
> +			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
> +				name, engine->name, p->count, integer, decimal,
> +				div_u64(p->runtime, 1000 * 1000),
> +				div_u64(ktime_to_ns(p->time), 1000 * 1000));
> +			idx++;
> +		}
> +	}
> +
> +	cpu_latency_qos_remove_request(&qos);
> +	kfree(engines);
> +	return err;
> +}
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-22  7:37 [Intel-gfx] [CI] " Chris Wilson
@ 2020-04-22  7:42 ` Chris Wilson
  2020-04-22 22:45   ` Andi Shyti
  0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2020-04-22  7:42 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Under ideal circumstances, the driver should be able to keep the GPU
fully saturated with work. Measure how close to ideal we get under the
harshest of conditions with no user payload.

v2: Also measure throughput using only one thread.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_request.c | 580 +++++++++++++++++-
 2 files changed, 580 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index 3bf7f53e9924..d8da142985eb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -16,5 +16,6 @@
  * Tests are executed in order by igt/i915_selftest
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(request, i915_request_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 1dab0360f76a..3b319c0953cb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prime_numbers.h>
+#include <linux/pm_qos.h>
 
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
@@ -1239,7 +1240,7 @@ static int live_parallel_engines(void *arg)
 		struct igt_live_test t;
 		unsigned int idx;
 
-		snprintf(name, sizeof(name), "%ps", fn);
+		snprintf(name, sizeof(name), "%ps", *fn);
 		err = igt_live_test_begin(&t, i915, __func__, name);
 		if (err)
 			break;
@@ -1476,3 +1477,580 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+static int switch_to_kernel_sync(struct intel_context *ce, int err)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	while (!err && !intel_engine_is_idle(ce->engine))
+		intel_engine_flush_submission(ce->engine);
+
+	return err;
+}
+
+struct perf_stats {
+	struct intel_engine_cs *engine;
+	unsigned long count;
+	ktime_t time;
+	ktime_t busy;
+	u64 runtime;
+};
+
+struct perf_series {
+	struct drm_i915_private *i915;
+	unsigned int nengines;
+	struct intel_context *ce[];
+};
+
+static int s_sync0(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return err;
+}
+
+static int s_sync1(void *arg)
+{
+	struct perf_series *ps = arg;
+	struct i915_request *prev = NULL;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+
+	return err;
+}
+
+static int s_many(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
+
+		i915_request_add(rq);
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return 0;
+}
+
+static int perf_series_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		s_sync0,
+		s_sync1,
+		s_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request qos;
+	struct perf_stats *stats;
+	struct perf_series *ps;
+	unsigned int idx;
+	int err = 0;
+
+	stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
+	if (!ps) {
+		kfree(stats);
+		return -ENOMEM;
+	}
+
+	cpu_latency_qos_add_request(&qos, 0); /* disable cstates */
+
+	ps->i915 = i915;
+	ps->nengines = nengines;
+
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			goto out;
+
+		err = intel_context_pin(ce);
+		if (err) {
+			intel_context_put(ce);
+			goto out;
+		}
+
+		ps->ce[idx++] = ce;
+	}
+	GEM_BUG_ON(idx != ps->nengines);
+
+	for (fn = func; *fn && !err; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p =
+				memset(&stats[idx], 0, sizeof(stats[idx]));
+			struct intel_context *ce = ps->ce[idx];
+
+			p->engine = ps->ce[idx]->engine;
+			intel_engine_pm_get(p->engine);
+
+			if (intel_engine_supports_stats(p->engine) &&
+			    !intel_enable_engine_stats(p->engine))
+				p->busy = intel_engine_get_busy_time(p->engine) + 1;
+			p->runtime = -intel_context_get_total_runtime_ns(ce);
+			p->time = ktime_get();
+		}
+
+		err = (*fn)(ps);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p = &stats[idx];
+			struct intel_context *ce = ps->ce[idx];
+			int integer, decimal;
+			u64 busy, dt;
+
+			p->time = ktime_sub(ktime_get(), p->time);
+			if (p->busy) {
+				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
+						    p->busy - 1);
+				intel_disable_engine_stats(p->engine);
+			}
+
+			err = switch_to_kernel_sync(ce, err);
+			p->runtime += intel_context_get_total_runtime_ns(ce);
+			intel_engine_pm_put(p->engine);
+
+			busy = 100 * ktime_to_ns(p->busy);
+			dt = ktime_to_ns(p->time);
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, p->engine->name, ce->timeline->seqno,
+				integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+		}
+	}
+
+out:
+	for (idx = 0; idx < nengines; idx++) {
+		if (IS_ERR_OR_NULL(ps->ce[idx]))
+			break;
+
+		intel_context_unpin(ps->ce[idx]);
+		intel_context_put(ps->ce[idx]);
+	}
+	kfree(ps);
+
+	cpu_latency_qos_remove_request(&qos);
+	kfree(stats);
+	return err;
+}
+
+static int p_sync0(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_sync1(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct i915_request *prev = NULL;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_many(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+	bool busy;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	count = 0;
+	p->time = ktime_get();
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		p_sync0,
+		p_sync1,
+		p_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request qos;
+	struct {
+		struct perf_stats p;
+		struct task_struct *tsk;
+	} *engines;
+	int err = 0;
+
+	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	cpu_latency_qos_add_request(&qos, 0);
+
+	for (fn = func; *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			intel_engine_pm_get(engine);
+
+			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+			engines[idx].p.engine = engine;
+
+			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+						       "igt:%s", engine->name);
+			if (IS_ERR(engines[idx].tsk)) {
+				err = PTR_ERR(engines[idx].tsk);
+				intel_engine_pm_put(engine);
+				break;
+			}
+			get_task_struct(engines[idx++].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(engines[idx].tsk))
+				break;
+
+			status = kthread_stop(engines[idx].tsk);
+			if (status && !err)
+				err = status;
+
+			intel_engine_pm_put(engine);
+			put_task_struct(engines[idx++].tsk);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			struct perf_stats *p = &engines[idx].p;
+			u64 busy = 100 * ktime_to_ns(p->busy);
+			u64 dt = ktime_to_ns(p->time);
+			int integer, decimal;
+
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			GEM_BUG_ON(engine != p->engine);
+			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, engine->name, p->count, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+			idx++;
+		}
+	}
+
+	cpu_latency_qos_remove_request(&qos);
+	kfree(engines);
+	return err;
+}
+
+int i915_request_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_series_engines),
+		SUBTEST(perf_parallel_engines),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
@ 2020-04-16 17:46 Chris Wilson
  0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2020-04-16 17:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Under ideal circumstances, the driver should be able to keep the GPU
fully saturated with work. Measure how close to ideal we get under the
harshest of conditions with no user payload.

v2: Also measure throughput using only one thread.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_request.c | 590 +++++++++++++++++-
 2 files changed, 590 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index 3bf7f53e9924..d8da142985eb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -16,5 +16,6 @@
  * Tests are executed in order by igt/i915_selftest
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(request, i915_request_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 1dab0360f76a..750ced92141b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prime_numbers.h>
+#include <linux/pm_qos.h>
 
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
@@ -1239,7 +1240,7 @@ static int live_parallel_engines(void *arg)
 		struct igt_live_test t;
 		unsigned int idx;
 
-		snprintf(name, sizeof(name), "%ps", fn);
+		snprintf(name, sizeof(name), "%ps", *fn);
 		err = igt_live_test_begin(&t, i915, __func__, name);
 		if (err)
 			break;
@@ -1476,3 +1477,590 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+static int switch_to_kernel_sync(struct intel_context *ce, int err)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	while (!err && !intel_engine_is_idle(ce->engine))
+		intel_engine_flush_submission(ce->engine);
+
+	return err;
+}
+
+struct perf_stats {
+	struct intel_engine_cs *engine;
+	unsigned long count;
+	ktime_t time;
+	ktime_t busy;
+	u64 runtime;
+};
+
+struct perf_series {
+	struct drm_i915_private *i915;
+	unsigned int nengines;
+	struct intel_context *ce[];
+};
+
+static int s_sync0(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return err;
+}
+
+static int s_sync1(void *arg)
+{
+	struct perf_series *ps = arg;
+	struct i915_request *prev = NULL;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+
+	return err;
+}
+
+static int s_many(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
+
+		i915_request_add(rq);
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return 0;
+}
+
+static int perf_series_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		s_sync0,
+		s_sync1,
+		s_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct perf_stats *stats;
+	struct perf_series *ps;
+	unsigned int idx;
+	int err = 0;
+
+	stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		cpu_latency_qos_add_request(qos, 0);
+
+	ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
+	if (!ps) {
+		kfree(stats);
+		return -ENOMEM;
+	}
+
+	ps->i915 = i915;
+	ps->nengines = nengines;
+
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			goto out;
+
+		err = intel_context_pin(ce);
+		if (err) {
+			intel_context_put(ce);
+			goto out;
+		}
+
+		ps->ce[idx++] = ce;
+	}
+	GEM_BUG_ON(idx != ps->nengines);
+
+	for (fn = func; *fn && !err; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p =
+				memset(&stats[idx], 0, sizeof(stats[idx]));
+			struct intel_context *ce = ps->ce[idx];
+
+			p->engine = ps->ce[idx]->engine;
+			intel_engine_pm_get(p->engine);
+
+			if (intel_engine_supports_stats(p->engine) &&
+			    !intel_enable_engine_stats(p->engine))
+				p->busy = intel_engine_get_busy_time(p->engine) + 1;
+			p->runtime = -intel_context_get_total_runtime_ns(ce);
+			p->time = ktime_get();
+		}
+
+		err = (*fn)(ps);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p = &stats[idx];
+			struct intel_context *ce = ps->ce[idx];
+			int integer, decimal;
+			u64 busy, dt;
+
+			p->time = ktime_sub(ktime_get(), p->time);
+			if (p->busy) {
+				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
+						    p->busy - 1);
+				intel_disable_engine_stats(p->engine);
+			}
+
+			err = switch_to_kernel_sync(ce, err);
+			p->runtime += intel_context_get_total_runtime_ns(ce);
+			intel_engine_pm_put(p->engine);
+
+			busy = 100 * ktime_to_ns(p->busy);
+			dt = ktime_to_ns(p->time);
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, p->engine->name, ce->timeline->seqno,
+				integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+		}
+	}
+
+out:
+	for (idx = 0; idx < nengines; idx++) {
+		if (IS_ERR_OR_NULL(ps->ce[idx]))
+			break;
+
+		intel_context_unpin(ps->ce[idx]);
+		intel_context_put(ps->ce[idx]);
+	}
+	kfree(ps);
+
+	if (qos) {
+		cpu_latency_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(stats);
+	return err;
+}
+
+static int p_sync0(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_sync1(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct i915_request *prev = NULL;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_many(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+	bool busy;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	count = 0;
+	p->time = ktime_get();
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		p_sync0,
+		p_sync1,
+		p_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct {
+		struct perf_stats p;
+		struct task_struct *tsk;
+	} *engines;
+	int err = 0;
+
+	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		cpu_latency_qos_add_request(qos, 0);
+
+	for (fn = func; *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			intel_engine_pm_get(engine);
+
+			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+			engines[idx].p.engine = engine;
+
+			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+						       "igt:%s", engine->name);
+			if (IS_ERR(engines[idx].tsk)) {
+				err = PTR_ERR(engines[idx].tsk);
+				intel_engine_pm_put(engine);
+				break;
+			}
+			get_task_struct(engines[idx++].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(engines[idx].tsk))
+				break;
+
+			status = kthread_stop(engines[idx].tsk);
+			if (status && !err)
+				err = status;
+
+			intel_engine_pm_put(engine);
+			put_task_struct(engines[idx++].tsk);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			struct perf_stats *p = &engines[idx].p;
+			u64 busy = 100 * ktime_to_ns(p->busy);
+			u64 dt = ktime_to_ns(p->time);
+			int integer, decimal;
+
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			GEM_BUG_ON(engine != p->engine);
+			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, engine->name, p->count, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+			idx++;
+		}
+	}
+
+	if (qos) {
+		cpu_latency_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(engines);
+	return err;
+}
+
+int i915_request_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_series_engines),
+		SUBTEST(perf_parallel_engines),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
@ 2020-02-25 21:46 Chris Wilson
  0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2020-02-25 21:46 UTC (permalink / raw)
  To: intel-gfx

Under ideal circumstances, the driver should be able to keep the GPU
fully saturated with work. Measure how close to ideal we get under the
harshest of conditions with no user payload.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_request.c | 280 +++++++++++++++++-
 2 files changed, 280 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index 3bf7f53e9924..d8da142985eb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -16,5 +16,6 @@
  * Tests are executed in order by igt/i915_selftest
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(request, i915_request_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index f89d9c42f1fa..91f67995f0ac 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prime_numbers.h>
+#include <linux/pm_qos.h>
 
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
@@ -1233,7 +1234,7 @@ static int live_parallel_engines(void *arg)
 		struct igt_live_test t;
 		unsigned int idx;
 
-		snprintf(name, sizeof(name), "%pS", fn);
+		snprintf(name, sizeof(name), "%ps", *fn);
 		err = igt_live_test_begin(&t, i915, __func__, name);
 		if (err)
 			break;
@@ -1470,3 +1471,280 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+struct perf_parallel {
+	struct intel_engine_cs *engine;
+	unsigned long count;
+	ktime_t time;
+	ktime_t busy;
+	u64 runtime;
+};
+
+static int switch_to_kernel_sync(struct intel_context *ce, int err)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	while (!err && !intel_engine_is_idle(ce->engine))
+		intel_engine_flush_submission(ce->engine);
+
+	return err;
+}
+
+static int perf_sync(void *arg)
+{
+	struct perf_parallel *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_many(void *arg)
+{
+	struct perf_parallel *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+	bool busy;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	count = 0;
+	p->time = ktime_get();
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		perf_sync,
+		perf_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct pm_qos_request qos = {};
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct {
+		struct perf_parallel p;
+		struct task_struct *tsk;
+	} *engines;
+	int err = 0;
+
+	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	pm_qos_add_request(&qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	for (fn = func; *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			intel_engine_pm_get(engine);
+
+			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+			engines[idx].p.engine = engine;
+
+			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+						       "igt:%s", engine->name);
+			if (IS_ERR(engines[idx].tsk)) {
+				err = PTR_ERR(engines[idx].tsk);
+				intel_engine_pm_put(engine);
+				break;
+			}
+			get_task_struct(engines[idx++].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(engines[idx].tsk))
+				break;
+
+			status = kthread_stop(engines[idx].tsk);
+			if (status && !err)
+				err = status;
+
+			intel_engine_pm_put(engine);
+			put_task_struct(engines[idx++].tsk);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			struct perf_parallel *p = &engines[idx].p;
+			u64 busy = 100 * ktime_to_ns(p->busy);
+			u64 dt = ktime_to_ns(p->time);
+			int integer, decimal;
+
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			GEM_BUG_ON(engine != p->engine);
+			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, engine->name, p->count, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+			idx++;
+		}
+	}
+
+	pm_qos_remove_request(&qos);
+	kfree(engines);
+	return err;
+}
+
+int i915_request_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_parallel_engines),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-04-23 20:16 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-11 10:49 [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
2020-03-12  9:18 ` [Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915/selftests: Add request throughput measurement to perf (rev2) Patchwork
2020-03-12 13:34 ` [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf Tvrtko Ursulin
  -- strict thread matches above, loose matches on Subject: below --
2020-04-22  7:37 [Intel-gfx] [CI] " Chris Wilson
2020-04-22  7:42 ` [Intel-gfx] [PATCH] " Chris Wilson
2020-04-22 22:45   ` Andi Shyti
2020-04-23  6:52     ` Chris Wilson
2020-04-23 14:18       ` Andi Shyti
2020-04-23 14:59         ` Chris Wilson
2020-04-16 17:46 Chris Wilson
2020-02-25 21:46 Chris Wilson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).