All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [CI] drm/i915/selftests: Add request throughput measurement to perf
@ 2020-04-22  7:37 Chris Wilson
  2020-04-22  7:42 ` [Intel-gfx] [PATCH] " Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Chris Wilson @ 2020-04-22  7:37 UTC (permalink / raw)
  To: intel-gfx

Under ideal circumstances, the driver should be able to keep the GPU
fully saturated with work. Measure how close to ideal we get under the
harshest of conditions with no user payload.

v2: Also measure throughput using only one thread.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_request.c | 590 +++++++++++++++++-
 2 files changed, 590 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index 3bf7f53e9924..d8da142985eb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -16,5 +16,6 @@
  * Tests are executed in order by igt/i915_selftest
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(request, i915_request_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 1dab0360f76a..750ced92141b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prime_numbers.h>
+#include <linux/pm_qos.h>
 
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
@@ -1239,7 +1240,7 @@ static int live_parallel_engines(void *arg)
 		struct igt_live_test t;
 		unsigned int idx;
 
-		snprintf(name, sizeof(name), "%ps", fn);
+		snprintf(name, sizeof(name), "%ps", *fn);
 		err = igt_live_test_begin(&t, i915, __func__, name);
 		if (err)
 			break;
@@ -1476,3 +1477,590 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+static int switch_to_kernel_sync(struct intel_context *ce, int err)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	while (!err && !intel_engine_is_idle(ce->engine))
+		intel_engine_flush_submission(ce->engine);
+
+	return err;
+}
+
+struct perf_stats {
+	struct intel_engine_cs *engine;
+	unsigned long count;
+	ktime_t time;
+	ktime_t busy;
+	u64 runtime;
+};
+
+struct perf_series {
+	struct drm_i915_private *i915;
+	unsigned int nengines;
+	struct intel_context *ce[];
+};
+
+static int s_sync0(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return err;
+}
+
+static int s_sync1(void *arg)
+{
+	struct perf_series *ps = arg;
+	struct i915_request *prev = NULL;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+
+	return err;
+}
+
+static int s_many(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
+
+		i915_request_add(rq);
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return 0;
+}
+
+static int perf_series_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		s_sync0,
+		s_sync1,
+		s_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct perf_stats *stats;
+	struct perf_series *ps;
+	unsigned int idx;
+	int err = 0;
+
+	stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		cpu_latency_qos_add_request(qos, 0);
+
+	ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
+	if (!ps) {
+		kfree(stats);
+		return -ENOMEM;
+	}
+
+	ps->i915 = i915;
+	ps->nengines = nengines;
+
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			goto out;
+
+		err = intel_context_pin(ce);
+		if (err) {
+			intel_context_put(ce);
+			goto out;
+		}
+
+		ps->ce[idx++] = ce;
+	}
+	GEM_BUG_ON(idx != ps->nengines);
+
+	for (fn = func; *fn && !err; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p =
+				memset(&stats[idx], 0, sizeof(stats[idx]));
+			struct intel_context *ce = ps->ce[idx];
+
+			p->engine = ps->ce[idx]->engine;
+			intel_engine_pm_get(p->engine);
+
+			if (intel_engine_supports_stats(p->engine) &&
+			    !intel_enable_engine_stats(p->engine))
+				p->busy = intel_engine_get_busy_time(p->engine) + 1;
+			p->runtime = -intel_context_get_total_runtime_ns(ce);
+			p->time = ktime_get();
+		}
+
+		err = (*fn)(ps);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p = &stats[idx];
+			struct intel_context *ce = ps->ce[idx];
+			int integer, decimal;
+			u64 busy, dt;
+
+			p->time = ktime_sub(ktime_get(), p->time);
+			if (p->busy) {
+				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
+						    p->busy - 1);
+				intel_disable_engine_stats(p->engine);
+			}
+
+			err = switch_to_kernel_sync(ce, err);
+			p->runtime += intel_context_get_total_runtime_ns(ce);
+			intel_engine_pm_put(p->engine);
+
+			busy = 100 * ktime_to_ns(p->busy);
+			dt = ktime_to_ns(p->time);
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, p->engine->name, ce->timeline->seqno,
+				integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+		}
+	}
+
+out:
+	for (idx = 0; idx < nengines; idx++) {
+		if (IS_ERR_OR_NULL(ps->ce[idx]))
+			break;
+
+		intel_context_unpin(ps->ce[idx]);
+		intel_context_put(ps->ce[idx]);
+	}
+	kfree(ps);
+
+	if (qos) {
+		cpu_latency_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(stats);
+	return err;
+}
+
+static int p_sync0(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_sync1(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct i915_request *prev = NULL;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_many(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+	bool busy;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	count = 0;
+	p->time = ktime_get();
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		p_sync0,
+		p_sync1,
+		p_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct {
+		struct perf_stats p;
+		struct task_struct *tsk;
+	} *engines;
+	int err = 0;
+
+	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		cpu_latency_qos_add_request(qos, 0);
+
+	for (fn = func; *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			intel_engine_pm_get(engine);
+
+			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+			engines[idx].p.engine = engine;
+
+			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+						       "igt:%s", engine->name);
+			if (IS_ERR(engines[idx].tsk)) {
+				err = PTR_ERR(engines[idx].tsk);
+				intel_engine_pm_put(engine);
+				break;
+			}
+			get_task_struct(engines[idx++].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(engines[idx].tsk))
+				break;
+
+			status = kthread_stop(engines[idx].tsk);
+			if (status && !err)
+				err = status;
+
+			intel_engine_pm_put(engine);
+			put_task_struct(engines[idx++].tsk);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			struct perf_stats *p = &engines[idx].p;
+			u64 busy = 100 * ktime_to_ns(p->busy);
+			u64 dt = ktime_to_ns(p->time);
+			int integer, decimal;
+
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			GEM_BUG_ON(engine != p->engine);
+			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, engine->name, p->count, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+			idx++;
+		}
+	}
+
+	if (qos) {
+		cpu_latency_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(engines);
+	return err;
+}
+
+int i915_request_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_series_engines),
+		SUBTEST(perf_parallel_engines),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-22  7:37 [Intel-gfx] [CI] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
@ 2020-04-22  7:42 ` Chris Wilson
  2020-04-22 22:45   ` Andi Shyti
  2020-04-22  8:16 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/selftests: Add request throughput measurement to perf (rev5) Patchwork
  2020-04-22  8:41 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  2 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2020-04-22  7:42 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Under ideal circumstances, the driver should be able to keep the GPU
fully saturated with work. Measure how close to ideal we get under the
harshest of conditions with no user payload.

v2: Also measure throughput using only one thread.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_request.c | 580 +++++++++++++++++-
 2 files changed, 580 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index 3bf7f53e9924..d8da142985eb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -16,5 +16,6 @@
  * Tests are executed in order by igt/i915_selftest
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(request, i915_request_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 1dab0360f76a..3b319c0953cb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prime_numbers.h>
+#include <linux/pm_qos.h>
 
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
@@ -1239,7 +1240,7 @@ static int live_parallel_engines(void *arg)
 		struct igt_live_test t;
 		unsigned int idx;
 
-		snprintf(name, sizeof(name), "%ps", fn);
+		snprintf(name, sizeof(name), "%ps", *fn);
 		err = igt_live_test_begin(&t, i915, __func__, name);
 		if (err)
 			break;
@@ -1476,3 +1477,580 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+static int switch_to_kernel_sync(struct intel_context *ce, int err)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	while (!err && !intel_engine_is_idle(ce->engine))
+		intel_engine_flush_submission(ce->engine);
+
+	return err;
+}
+
+struct perf_stats {
+	struct intel_engine_cs *engine;
+	unsigned long count;
+	ktime_t time;
+	ktime_t busy;
+	u64 runtime;
+};
+
+struct perf_series {
+	struct drm_i915_private *i915;
+	unsigned int nengines;
+	struct intel_context *ce[];
+};
+
+static int s_sync0(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return err;
+}
+
+static int s_sync1(void *arg)
+{
+	struct perf_series *ps = arg;
+	struct i915_request *prev = NULL;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+
+	return err;
+}
+
+static int s_many(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
+
+		i915_request_add(rq);
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return 0;
+}
+
+static int perf_series_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		s_sync0,
+		s_sync1,
+		s_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request qos;
+	struct perf_stats *stats;
+	struct perf_series *ps;
+	unsigned int idx;
+	int err = 0;
+
+	stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
+	if (!ps) {
+		kfree(stats);
+		return -ENOMEM;
+	}
+
+	cpu_latency_qos_add_request(&qos, 0); /* disable cstates */
+
+	ps->i915 = i915;
+	ps->nengines = nengines;
+
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			goto out;
+
+		err = intel_context_pin(ce);
+		if (err) {
+			intel_context_put(ce);
+			goto out;
+		}
+
+		ps->ce[idx++] = ce;
+	}
+	GEM_BUG_ON(idx != ps->nengines);
+
+	for (fn = func; *fn && !err; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p =
+				memset(&stats[idx], 0, sizeof(stats[idx]));
+			struct intel_context *ce = ps->ce[idx];
+
+			p->engine = ps->ce[idx]->engine;
+			intel_engine_pm_get(p->engine);
+
+			if (intel_engine_supports_stats(p->engine) &&
+			    !intel_enable_engine_stats(p->engine))
+				p->busy = intel_engine_get_busy_time(p->engine) + 1;
+			p->runtime = -intel_context_get_total_runtime_ns(ce);
+			p->time = ktime_get();
+		}
+
+		err = (*fn)(ps);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p = &stats[idx];
+			struct intel_context *ce = ps->ce[idx];
+			int integer, decimal;
+			u64 busy, dt;
+
+			p->time = ktime_sub(ktime_get(), p->time);
+			if (p->busy) {
+				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
+						    p->busy - 1);
+				intel_disable_engine_stats(p->engine);
+			}
+
+			err = switch_to_kernel_sync(ce, err);
+			p->runtime += intel_context_get_total_runtime_ns(ce);
+			intel_engine_pm_put(p->engine);
+
+			busy = 100 * ktime_to_ns(p->busy);
+			dt = ktime_to_ns(p->time);
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, p->engine->name, ce->timeline->seqno,
+				integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+		}
+	}
+
+out:
+	for (idx = 0; idx < nengines; idx++) {
+		if (IS_ERR_OR_NULL(ps->ce[idx]))
+			break;
+
+		intel_context_unpin(ps->ce[idx]);
+		intel_context_put(ps->ce[idx]);
+	}
+	kfree(ps);
+
+	cpu_latency_qos_remove_request(&qos);
+	kfree(stats);
+	return err;
+}
+
+static int p_sync0(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_sync1(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct i915_request *prev = NULL;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_many(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+	bool busy;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	count = 0;
+	p->time = ktime_get();
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		p_sync0,
+		p_sync1,
+		p_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request qos;
+	struct {
+		struct perf_stats p;
+		struct task_struct *tsk;
+	} *engines;
+	int err = 0;
+
+	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	cpu_latency_qos_add_request(&qos, 0);
+
+	for (fn = func; *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			intel_engine_pm_get(engine);
+
+			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+			engines[idx].p.engine = engine;
+
+			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+						       "igt:%s", engine->name);
+			if (IS_ERR(engines[idx].tsk)) {
+				err = PTR_ERR(engines[idx].tsk);
+				intel_engine_pm_put(engine);
+				break;
+			}
+			get_task_struct(engines[idx++].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(engines[idx].tsk))
+				break;
+
+			status = kthread_stop(engines[idx].tsk);
+			if (status && !err)
+				err = status;
+
+			intel_engine_pm_put(engine);
+			put_task_struct(engines[idx++].tsk);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			struct perf_stats *p = &engines[idx].p;
+			u64 busy = 100 * ktime_to_ns(p->busy);
+			u64 dt = ktime_to_ns(p->time);
+			int integer, decimal;
+
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			GEM_BUG_ON(engine != p->engine);
+			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, engine->name, p->count, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+			idx++;
+		}
+	}
+
+	cpu_latency_qos_remove_request(&qos);
+	kfree(engines);
+	return err;
+}
+
+int i915_request_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_series_engines),
+		SUBTEST(perf_parallel_engines),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/selftests: Add request throughput measurement to perf (rev5)
  2020-04-22  7:37 [Intel-gfx] [CI] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
  2020-04-22  7:42 ` [Intel-gfx] [PATCH] " Chris Wilson
@ 2020-04-22  8:16 ` Patchwork
  2020-04-22  8:41 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  2 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2020-04-22  8:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/selftests: Add request throughput measurement to perf (rev5)
URL   : https://patchwork.freedesktop.org/series/73930/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
10bc6b3b5e80 drm/i915/selftests: Add request throughput measurement to perf
-:96: WARNING:LINE_SPACING: Missing a blank line after declarations
#96: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1525:
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);

-:130: WARNING:LINE_SPACING: Missing a blank line after declarations
#130: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1559:
+	struct i915_request *prev = NULL;
+	IGT_TIMEOUT(end_time);

-:165: WARNING:LINE_SPACING: Missing a blank line after declarations
#165: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1594:
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);

-:188: WARNING:LINE_SPACING: Missing a blank line after declarations
#188: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1617:
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {

-:196: WARNING:LINE_SPACING: Missing a blank line after declarations
#196: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1625:
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);

-:320: WARNING:LINE_SPACING: Missing a blank line after declarations
#320: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1749:
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);

-:388: WARNING:LINE_SPACING: Missing a blank line after declarations
#388: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1817:
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);

-:457: WARNING:LINE_SPACING: Missing a blank line after declarations
#457: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1886:
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);

-:513: WARNING:LINE_SPACING: Missing a blank line after declarations
#513: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1942:
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {

-:521: WARNING:LINE_SPACING: Missing a blank line after declarations
#521: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1950:
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);

-:564: WARNING:YIELD: Using yield() is generally wrong. See yield() kernel-doc (sched/core.c)
#564: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1993:
+		yield(); /* start all threads before we kthread_stop() */

total: 0 errors, 11 warnings, 0 checks, 601 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915/selftests: Add request throughput measurement to perf (rev5)
  2020-04-22  7:37 [Intel-gfx] [CI] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
  2020-04-22  7:42 ` [Intel-gfx] [PATCH] " Chris Wilson
  2020-04-22  8:16 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/selftests: Add request throughput measurement to perf (rev5) Patchwork
@ 2020-04-22  8:41 ` Patchwork
  2 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2020-04-22  8:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/selftests: Add request throughput measurement to perf (rev5)
URL   : https://patchwork.freedesktop.org/series/73930/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8347 -> Patchwork_17414
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_17414 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_17414, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17414/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_17414:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@blt:
    - fi-elk-e7500:       [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8347/fi-elk-e7500/igt@i915_selftest@live@blt.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17414/fi-elk-e7500/igt@i915_selftest@live@blt.html

  
Known issues
------------

  Here are the changes found in Patchwork_17414 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live@late_gt_pm:
    - fi-snb-2600:        [PASS][3] -> [FAIL][4] ([i915#1763]) +1 similar issue
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8347/fi-snb-2600/igt@i915_selftest@live@late_gt_pm.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17414/fi-snb-2600/igt@i915_selftest@live@late_gt_pm.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@execlists:
    - fi-icl-y:           [DMESG-FAIL][5] ([i915#1314]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8347/fi-icl-y/igt@i915_selftest@live@execlists.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17414/fi-icl-y/igt@i915_selftest@live@execlists.html

  
  [i915#1314]: https://gitlab.freedesktop.org/drm/intel/issues/1314
  [i915#1763]: https://gitlab.freedesktop.org/drm/intel/issues/1763


Participating hosts (49 -> 42)
------------------------------

  Missing    (7): fi-cml-u2 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-kbl-7560u fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8347 -> Patchwork_17414

  CI-20190529: 20190529
  CI_DRM_8347: 4acd2fd67fbcfe23e07130bab11a47d4b43d0bd4 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5604: 18cc19ece602ba552a8386222b49e7e82820f9aa @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17414: 10bc6b3b5e8038b75ddec2de376b11daee9f2e73 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

10bc6b3b5e80 drm/i915/selftests: Add request throughput measurement to perf

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17414/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-22  7:42 ` [Intel-gfx] [PATCH] " Chris Wilson
@ 2020-04-22 22:45   ` Andi Shyti
  2020-04-23  6:52     ` Chris Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Andi Shyti @ 2020-04-22 22:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Hi Chris,

[...]

> +static int s_many(void *arg)
> +{
> +	struct perf_series *ps = arg;

why do we need to go through void... all functions are taking a
perf_series structure.

> +	IGT_TIMEOUT(end_time);
> +	unsigned int idx = 0;
> +

[...]

> +		for (idx = 0; idx < nengines; idx++) {
> +			struct perf_stats *p =
> +				memset(&stats[idx], 0, sizeof(stats[idx]));
> +			struct intel_context *ce = ps->ce[idx];
> +
> +			p->engine = ps->ce[idx]->engine;
> +			intel_engine_pm_get(p->engine);
> +
> +			if (intel_engine_supports_stats(p->engine) &&
> +			    !intel_enable_engine_stats(p->engine))
> +				p->busy = intel_engine_get_busy_time(p->engine) + 1;
> +			p->runtime = -intel_context_get_total_runtime_ns(ce);
> +			p->time = ktime_get();
> +		}
> +
> +		err = (*fn)(ps);
> +		if (igt_live_test_end(&t))
> +			err = -EIO;
> +
> +		for (idx = 0; idx < nengines; idx++) {
> +			struct perf_stats *p = &stats[idx];
> +			struct intel_context *ce = ps->ce[idx];
> +			int integer, decimal;
> +			u64 busy, dt;
> +
> +			p->time = ktime_sub(ktime_get(), p->time);
> +			if (p->busy) {
> +				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
> +						    p->busy - 1);
> +				intel_disable_engine_stats(p->engine);
> +			}
> +
> +			err = switch_to_kernel_sync(ce, err);

how about err?

> +			p->runtime += intel_context_get_total_runtime_ns(ce);

assigning a negative value to an unsigned so that you can add it
later? looks nice but odd :)

It's easier to understand if we do

p->runtime = intel_context_get_total_runtime_ns(ce) - p->runtime;

if you like it, no need to change, though.

[...]

> +static int p_many(void *arg)
> +{
> +	struct perf_stats *p = arg;
> +	struct intel_engine_cs *engine = p->engine;
> +	struct intel_context *ce;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long count;
> +	int err = 0;
> +	bool busy;
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
> +	err = intel_context_pin(ce);
> +	if (err) {
> +		intel_context_put(ce);
> +		return err;
> +	}
> +
> +	busy = false;
> +	if (intel_engine_supports_stats(engine) &&
> +	    !intel_enable_engine_stats(engine)) {
> +		p->busy = intel_engine_get_busy_time(engine);
> +		busy = true;
> +	}
> +
> +	count = 0;
> +	p->time = ktime_get();
> +	do {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_create(ce);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		i915_request_add(rq);

do we need a request_put as well?

> +		count++;
> +	} while (!__igt_timeout(end_time, NULL));
> +	p->time = ktime_sub(ktime_get(), p->time);
> +
> +	if (busy) {
> +		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> +				    p->busy);
> +		intel_disable_engine_stats(engine);
> +	}
> +
> +	err = switch_to_kernel_sync(ce, err);
> +	p->runtime = intel_context_get_total_runtime_ns(ce);
> +	p->count = count;
> +
> +	intel_context_unpin(ce);
> +	intel_context_put(ce);
> +	return err;
> +}

[...]

> +		for_each_uabi_engine(engine, i915) {
> +			int status;
> +
> +			if (IS_ERR(engines[idx].tsk))
> +				break;

if we break here aren't we missing engine_pm_put and put_task?

Andi

> +
> +			status = kthread_stop(engines[idx].tsk);
> +			if (status && !err)
> +				err = status;
> +
> +			intel_engine_pm_put(engine);
> +			put_task_struct(engines[idx++].tsk);
> +		}
> +
> +		if (igt_live_test_end(&t))
> +			err = -EIO;
> +		if (err)
> +			break;
> +
> +		idx = 0;
> +		for_each_uabi_engine(engine, i915) {
> +			struct perf_stats *p = &engines[idx].p;
> +			u64 busy = 100 * ktime_to_ns(p->busy);
> +			u64 dt = ktime_to_ns(p->time);
> +			int integer, decimal;
> +
> +			if (dt) {
> +				integer = div64_u64(busy, dt);
> +				busy -= integer * dt;
> +				decimal = div64_u64(100 * busy, dt);
> +			} else {
> +				integer = 0;
> +				decimal = 0;
> +			}
> +
> +			GEM_BUG_ON(engine != p->engine);
> +			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
> +				name, engine->name, p->count, integer, decimal,
> +				div_u64(p->runtime, 1000 * 1000),
> +				div_u64(ktime_to_ns(p->time), 1000 * 1000));
> +			idx++;
> +		}
> +	}
> +
> +	cpu_latency_qos_remove_request(&qos);
> +	kfree(engines);
> +	return err;
> +}
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-22 22:45   ` Andi Shyti
@ 2020-04-23  6:52     ` Chris Wilson
  2020-04-23 14:18       ` Andi Shyti
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2020-04-23  6:52 UTC (permalink / raw)
  To: Andi Shyti; +Cc: intel-gfx

Quoting Andi Shyti (2020-04-22 23:45:29)
> Hi Chris,
> 
> [...]
> 
> > +static int s_many(void *arg)
> > +{
> > +     struct perf_series *ps = arg;
> 
> why do we need to go through void... all functions are taking a
> perf_series structure.

The kthread API defines the function pointer as int (*fn)(void *arg);

> 
> > +     IGT_TIMEOUT(end_time);
> > +     unsigned int idx = 0;
> > +
> 
> [...]
> 
> > +             for (idx = 0; idx < nengines; idx++) {
> > +                     struct perf_stats *p =
> > +                             memset(&stats[idx], 0, sizeof(stats[idx]));
> > +                     struct intel_context *ce = ps->ce[idx];
> > +
> > +                     p->engine = ps->ce[idx]->engine;
> > +                     intel_engine_pm_get(p->engine);
> > +
> > +                     if (intel_engine_supports_stats(p->engine) &&
> > +                         !intel_enable_engine_stats(p->engine))
> > +                             p->busy = intel_engine_get_busy_time(p->engine) + 1;
> > +                     p->runtime = -intel_context_get_total_runtime_ns(ce);
> > +                     p->time = ktime_get();
> > +             }
> > +
> > +             err = (*fn)(ps);
> > +             if (igt_live_test_end(&t))
> > +                     err = -EIO;
> > +
> > +             for (idx = 0; idx < nengines; idx++) {
> > +                     struct perf_stats *p = &stats[idx];
> > +                     struct intel_context *ce = ps->ce[idx];
> > +                     int integer, decimal;
> > +                     u64 busy, dt;
> > +
> > +                     p->time = ktime_sub(ktime_get(), p->time);
> > +                     if (p->busy) {
> > +                             p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
> > +                                                 p->busy - 1);
> > +                             intel_disable_engine_stats(p->engine);
> > +                     }
> > +
> > +                     err = switch_to_kernel_sync(ce, err);
> 
> how about err?

It's a case of where we need to flush all the engines regardless of the
error, so we wait until the end.

> > +                     p->runtime += intel_context_get_total_runtime_ns(ce);
> 
> assigning a negative value to an unsigned so that you can add it
> later? looks nice but odd :)
> 
> It's easier to understand if we do
> 
> p->runtime = intel_context_get_total_runtime_ns(ce) - p->runtime;
> 
> if you like it, no need to change, though.

I've done both. I'm fond of the unsigned addition of a negative number.
It's a few instructions less. :-p

> 
> [...]
> 
> > +static int p_many(void *arg)
> > +{
> > +     struct perf_stats *p = arg;
> > +     struct intel_engine_cs *engine = p->engine;
> > +     struct intel_context *ce;
> > +     IGT_TIMEOUT(end_time);
> > +     unsigned long count;
> > +     int err = 0;
> > +     bool busy;
> > +
> > +     ce = intel_context_create(engine);
> > +     if (IS_ERR(ce))
> > +             return PTR_ERR(ce);
> > +
> > +     err = intel_context_pin(ce);
> > +     if (err) {
> > +             intel_context_put(ce);
> > +             return err;
> > +     }
> > +
> > +     busy = false;
> > +     if (intel_engine_supports_stats(engine) &&
> > +         !intel_enable_engine_stats(engine)) {
> > +             p->busy = intel_engine_get_busy_time(engine);
> > +             busy = true;
> > +     }
> > +
> > +     count = 0;
> > +     p->time = ktime_get();
> > +     do {
> > +             struct i915_request *rq;
> > +
> > +             rq = i915_request_create(ce);
> > +             if (IS_ERR(rq)) {
> > +                     err = PTR_ERR(rq);
> > +                     break;
> > +             }
> > +
> > +             i915_request_add(rq);
> 
> do we need a request_put as well?

No. Nasty API, add consumes the reference. It's on the list to update
now that we have a ~majority of cases that want to keep a reference to
their request.


> 
> > +             count++;
> > +     } while (!__igt_timeout(end_time, NULL));
> > +     p->time = ktime_sub(ktime_get(), p->time);
> > +
> > +     if (busy) {
> > +             p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> > +                                 p->busy);
> > +             intel_disable_engine_stats(engine);
> > +     }
> > +
> > +     err = switch_to_kernel_sync(ce, err);
> > +     p->runtime = intel_context_get_total_runtime_ns(ce);
> > +     p->count = count;
> > +
> > +     intel_context_unpin(ce);
> > +     intel_context_put(ce);
> > +     return err;
> > +}
> 
> [...]
> 
> > +             for_each_uabi_engine(engine, i915) {
> > +                     int status;
> > +
> > +                     if (IS_ERR(engines[idx].tsk))
> > +                             break;
> 
> if we break here aren't we missing engine_pm_put and put_task?

No. We broke in the earlier loop as well, so the ERR_PTR is the
sentinel, indicating the end of the assignments.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-23  6:52     ` Chris Wilson
@ 2020-04-23 14:18       ` Andi Shyti
  2020-04-23 14:59         ` Chris Wilson
  0 siblings, 1 reply; 8+ messages in thread
From: Andi Shyti @ 2020-04-23 14:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Hi Chris,

> > > +static int s_many(void *arg)
> > > +{
> > > +     struct perf_series *ps = arg;
> > 
> > why do we need to go through void... all functions are taking a
> > perf_series structure.
> 
> The kthread API defines the function pointer as int (*fn)(void *arg);

In the parallel tes, not in the serial test. right? But, OK,
it's not a big deal.

> > > +                     p->time = ktime_sub(ktime_get(), p->time);
> > > +                     if (p->busy) {
> > > +                             p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
> > > +                                                 p->busy - 1);
> > > +                             intel_disable_engine_stats(p->engine);
> > > +                     }
> > > +
> > > +                     err = switch_to_kernel_sync(ce, err);
> > 
> > how about err?
> 
> It's a case of where we need to flush all the engines regardless of the
> error, so we wait until the end.

so you put it there to remove also the warning :)

> > > +                     p->runtime += intel_context_get_total_runtime_ns(ce);
> > 
> > assigning a negative value to an unsigned so that you can add it
> > later? looks nice but odd :)
> > 
> > It's easier to understand if we do
> > 
> > p->runtime = intel_context_get_total_runtime_ns(ce) - p->runtime;
> > 
> > if you like it, no need to change, though.
> 
> I've done both. I'm fond of the unsigned addition of a negative number.
> It's a few instructions less. :-p

Yes, nevertheless, I like it, even though it's somehow an abuse
of the concept of unsigned.

> > > +     count = 0;
> > > +     p->time = ktime_get();
> > > +     do {
> > > +             struct i915_request *rq;
> > > +
> > > +             rq = i915_request_create(ce);
> > > +             if (IS_ERR(rq)) {
> > > +                     err = PTR_ERR(rq);
> > > +                     break;
> > > +             }
> > > +
> > > +             i915_request_add(rq);
> > 
> > do we need a request_put as well?
> 
> No. Nasty API, add consumes the reference. It's on the list to update
> now that we have a ~majority of cases that want to keep a reference to
> their request.

arrghh!

> > > +             for_each_uabi_engine(engine, i915) {
> > > +                     int status;
> > > +
> > > +                     if (IS_ERR(engines[idx].tsk))
> > > +                             break;
> > 
> > if we break here aren't we missing engine_pm_put and put_task?
> 
> No. We broke in the earlier loop as well, so the ERR_PTR is the
> sentinel, indicating the end of the assignments.

Oh yes, true!

Reviewed-by: Andi Shyti <andi.shyti@intel.com>

Now finally, after a month it has been lingering around, you can
finally push it.

Thanks,
Andi
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-23 14:18       ` Andi Shyti
@ 2020-04-23 14:59         ` Chris Wilson
  0 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2020-04-23 14:59 UTC (permalink / raw)
  To: Andi Shyti; +Cc: intel-gfx

Quoting Andi Shyti (2020-04-23 15:18:47)
> Hi Chris,
> 
> > > > +static int s_many(void *arg)
> > > > +{
> > > > +     struct perf_series *ps = arg;
> > > 
> > > why do we need to go through void... all functions are taking a
> > > perf_series structure.
> > 
> > The kthread API defines the function pointer as int (*fn)(void *arg);
> 
> In the parallel tes, not in the serial test. right? But, OK,
> it's not a big deal.

Imagine I copy-paste the kthread_run as well :)

> > > > +                     p->time = ktime_sub(ktime_get(), p->time);
> > > > +                     if (p->busy) {
> > > > +                             p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
> > > > +                                                 p->busy - 1);
> > > > +                             intel_disable_engine_stats(p->engine);
> > > > +                     }
> > > > +
> > > > +                     err = switch_to_kernel_sync(ce, err);
> > > 
> > > how about err?
> > 
> > It's a case of where we need to flush all the engines regardless of the
> > error, so we wait until the end.
> 
> so you put it there to remove also the warning :)

The idea was that it would keep the error until the end, trying to flush
as it went.

> > > > +                     p->runtime += intel_context_get_total_runtime_ns(ce);
> > > 
> > > assigning a negative value to an unsigned so that you can add it
> > > later? looks nice but odd :)
> > > 
> > > It's easier to understand if we do
> > > 
> > > p->runtime = intel_context_get_total_runtime_ns(ce) - p->runtime;
> > > 
> > > if you like it, no need to change, though.
> > 
> > I've done both. I'm fond of the unsigned addition of a negative number.
> > It's a few instructions less. :-p
> 
> Yes, nevertheless, I like it, even though it's somehow an abuse
> of the concept of unsigned.

It's not negative, just a large positive number!

> > > > +     count = 0;
> > > > +     p->time = ktime_get();
> > > > +     do {
> > > > +             struct i915_request *rq;
> > > > +
> > > > +             rq = i915_request_create(ce);
> > > > +             if (IS_ERR(rq)) {
> > > > +                     err = PTR_ERR(rq);
> > > > +                     break;
> > > > +             }
> > > > +
> > > > +             i915_request_add(rq);
> > > 
> > > do we need a request_put as well?
> > 
> > No. Nasty API, add consumes the reference. It's on the list to update
> > now that we have a ~majority of cases that want to keep a reference to
> > their request.
> 
> arrghh!
> 
> > > > +             for_each_uabi_engine(engine, i915) {
> > > > +                     int status;
> > > > +
> > > > +                     if (IS_ERR(engines[idx].tsk))
> > > > +                             break;
> > > 
> > > if we break here aren't we missing engine_pm_put and put_task?
> > 
> > No. We broke in the earlier loop as well, so the ERR_PTR is the
> > sentinel, indicating the end of the assignments.
> 
> Oh yes, true!
> 
> Reviewed-by: Andi Shyti <andi.shyti@intel.com>
> 
> Now finally, after a month it has been lingering around, you can
> finally push it.

And get back to the real question of how we can improve the backend so
that we can saturate the GPU with the least number of CPUs.

And if we are happy with the kernel, there's the issue of where all the
throughput goes between the kernel and userspace. (The purpose of this
test was to ensure we had no silly bugs inside the kernel that was
causing the sub-par userspace performance. We don't seem to, but it's
also not perfect, plenty of room for improvement.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-04-23 20:16 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-22  7:37 [Intel-gfx] [CI] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
2020-04-22  7:42 ` [Intel-gfx] [PATCH] " Chris Wilson
2020-04-22 22:45   ` Andi Shyti
2020-04-23  6:52     ` Chris Wilson
2020-04-23 14:18       ` Andi Shyti
2020-04-23 14:59         ` Chris Wilson
2020-04-22  8:16 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/selftests: Add request throughput measurement to perf (rev5) Patchwork
2020-04-22  8:41 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.