From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH 08/17] drm/i915/selftests: Add request throughput measurement to perf
Date: Tue, 10 Mar 2020 10:38:21 +0000 [thread overview]
Message-ID: <e5360a24-91e9-a873-5a9b-5b6e5cca59ac@linux.intel.com> (raw)
In-Reply-To: <20200306133852.3420322-8-chris@chris-wilson.co.uk>
On 06/03/2020 13:38, Chris Wilson wrote:
> Under ideal circumstances, the driver should be able to keep the GPU
> fully saturated with work. Measure how close to ideal we get under the
> harshest of conditions with no user payload.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> .../drm/i915/selftests/i915_perf_selftests.h | 1 +
> drivers/gpu/drm/i915/selftests/i915_request.c | 285 +++++++++++++++++-
> 2 files changed, 285 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> index 3bf7f53e9924..d8da142985eb 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> @@ -16,5 +16,6 @@
> * Tests are executed in order by igt/i915_selftest
> */
> selftest(engine_cs, intel_engine_cs_perf_selftests)
> +selftest(request, i915_request_perf_selftests)
> selftest(blt, i915_gem_object_blt_perf_selftests)
> selftest(region, intel_memory_region_perf_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index f89d9c42f1fa..d4c088cfe4e1 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -23,6 +23,7 @@
> */
>
> #include <linux/prime_numbers.h>
> +#include <linux/pm_qos.h>
>
> #include "gem/i915_gem_pm.h"
> #include "gem/selftests/mock_context.h"
> @@ -1233,7 +1234,7 @@ static int live_parallel_engines(void *arg)
> struct igt_live_test t;
> unsigned int idx;
>
> - snprintf(name, sizeof(name), "%pS", fn);
> + snprintf(name, sizeof(name), "%ps", *fn);
> err = igt_live_test_begin(&t, i915, __func__, name);
> if (err)
> break;
> @@ -1470,3 +1471,285 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
>
> return i915_subtests(tests, i915);
> }
> +
> +struct perf_parallel {
> + struct intel_engine_cs *engine;
> + unsigned long count;
> + ktime_t time;
> + ktime_t busy;
> + u64 runtime;
> +};
> +
> +static int switch_to_kernel_sync(struct intel_context *ce, int err)
> +{
> + struct i915_request *rq;
> + struct dma_fence *fence;
> +
> + rq = intel_engine_create_kernel_request(ce->engine);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + fence = i915_active_fence_get(&ce->timeline->last_request);
> + if (fence) {
> + i915_request_await_dma_fence(rq, fence);
> + dma_fence_put(fence);
> + }
> +
> + rq = i915_request_get(rq);
> + i915_request_add(rq);
> + if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
> + err = -ETIME;
> + i915_request_put(rq);
> +
> + while (!err && !intel_engine_is_idle(ce->engine))
> + intel_engine_flush_submission(ce->engine);
> +
> + return err;
> +}
> +
> +static int perf_sync(void *arg)
> +{
> + struct perf_parallel *p = arg;
> + struct intel_engine_cs *engine = p->engine;
> + struct intel_context *ce;
> + IGT_TIMEOUT(end_time);
> + unsigned long count;
> + bool busy;
> + int err = 0;
> +
> + ce = intel_context_create(engine);
> + if (IS_ERR(ce))
> + return PTR_ERR(ce);
> +
> + err = intel_context_pin(ce);
> + if (err) {
> + intel_context_put(ce);
> + return err;
> + }
> +
> + busy = false;
> + if (intel_engine_supports_stats(engine) &&
> + !intel_enable_engine_stats(engine)) {
> + p->busy = intel_engine_get_busy_time(engine);
> + busy = true;
> + }
> +
> + p->time = ktime_get();
> + count = 0;
> + do {
> + struct i915_request *rq;
> +
> + rq = i915_request_create(ce);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + break;
> + }
> +
> + i915_request_get(rq);
> + i915_request_add(rq);
> +
> + err = 0;
> + if (i915_request_wait(rq, 0, HZ / 5) < 0)
> + err = -ETIME;
> + i915_request_put(rq);
> + if (err)
> + break;
> +
> + count++;
> + } while (!__igt_timeout(end_time, NULL));
> + p->time = ktime_sub(ktime_get(), p->time);
> +
> + if (busy) {
> + p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> + p->busy);
> + intel_disable_engine_stats(engine);
> + }
> +
> + err = switch_to_kernel_sync(ce, err);
> + p->runtime = intel_context_get_total_runtime_ns(ce);
> + p->count = count;
> +
> + intel_context_unpin(ce);
> + intel_context_put(ce);
> + return err;
> +}
> +
> +static int perf_many(void *arg)
> +{
> + struct perf_parallel *p = arg;
> + struct intel_engine_cs *engine = p->engine;
> + struct intel_context *ce;
> + IGT_TIMEOUT(end_time);
> + unsigned long count;
> + int err = 0;
> + bool busy;
> +
> + ce = intel_context_create(engine);
> + if (IS_ERR(ce))
> + return PTR_ERR(ce);
> +
> + err = intel_context_pin(ce);
> + if (err) {
> + intel_context_put(ce);
> + return err;
> + }
> +
> + busy = false;
> + if (intel_engine_supports_stats(engine) &&
> + !intel_enable_engine_stats(engine)) {
> + p->busy = intel_engine_get_busy_time(engine);
> + busy = true;
> + }
> +
> + count = 0;
> + p->time = ktime_get();
> + do {
> + struct i915_request *rq;
> +
> + rq = i915_request_create(ce);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + break;
> + }
> +
> + i915_request_add(rq);
Any concerns on ring size here and maybe managing the wait explicitly?
> + count++;
> + } while (!__igt_timeout(end_time, NULL));
> + p->time = ktime_sub(ktime_get(), p->time);
> +
> + if (busy) {
> + p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> + p->busy);
> + intel_disable_engine_stats(engine);
> + }
> +
> + err = switch_to_kernel_sync(ce, err);
> + p->runtime = intel_context_get_total_runtime_ns(ce);
> + p->count = count;
> +
> + intel_context_unpin(ce);
> + intel_context_put(ce);
> + return err;
> +}
> +
> +static int perf_parallel_engines(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + static int (* const func[])(void *arg) = {
> + perf_sync,
> + perf_many,
> + NULL,
> + };
> + const unsigned int nengines = num_uabi_engines(i915);
> + struct intel_engine_cs *engine;
> + int (* const *fn)(void *arg);
> + struct pm_qos_request *qos;
> + struct {
> + struct perf_parallel p;
> + struct task_struct *tsk;
> + } *engines;
> + int err = 0;
> +
> + engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
> + if (!engines)
> + return -ENOMEM;
> +
> + qos = kzalloc(sizeof(*qos), GFP_KERNEL);
> + if (qos)
> + pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
> +
> + for (fn = func; *fn; fn++) {
> + char name[KSYM_NAME_LEN];
> + struct igt_live_test t;
> + unsigned int idx;
> +
> + snprintf(name, sizeof(name), "%ps", *fn);
Is this any better than just storing the name in local static array?
> + err = igt_live_test_begin(&t, i915, __func__, name);
> + if (err)
> + break;
> +
> + atomic_set(&i915->selftest.counter, nengines);
> +
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
For a pure driver overhead test I would suggest this to be a gt live test.
> + intel_engine_pm_get(engine);
> +
> + memset(&engines[idx].p, 0, sizeof(engines[idx].p));
> + engines[idx].p.engine = engine;
> +
> + engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
> + "igt:%s", engine->name);
Test will get affected by the host CPU core count. How about we only
measure num_cpu engines? Might be even more important with discrete.
> + if (IS_ERR(engines[idx].tsk)) {
> + err = PTR_ERR(engines[idx].tsk);
> + intel_engine_pm_put(engine);
> + break;
> + }
> + get_task_struct(engines[idx++].tsk);
> + }
> +
> + yield(); /* start all threads before we kthread_stop() */
> +
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> + int status;
> +
> + if (IS_ERR(engines[idx].tsk))
> + break;
> +
> + status = kthread_stop(engines[idx].tsk);
> + if (status && !err)
> + err = status;
> +
> + intel_engine_pm_put(engine);
> + put_task_struct(engines[idx++].tsk);
> + }
> +
> + if (igt_live_test_end(&t))
> + err = -EIO;
> + if (err)
> + break;
> +
> + idx = 0;
> + for_each_uabi_engine(engine, i915) {
> + struct perf_parallel *p = &engines[idx].p;
> + u64 busy = 100 * ktime_to_ns(p->busy);
> + u64 dt = ktime_to_ns(p->time);
> + int integer, decimal;
> +
> + if (dt) {
> + integer = div64_u64(busy, dt);
> + busy -= integer * dt;
> + decimal = div64_u64(100 * busy, dt);
> + } else {
> + integer = 0;
> + decimal = 0;
> + }
> +
> + GEM_BUG_ON(engine != p->engine);
> + pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
> + name, engine->name, p->count, integer, decimal,
> + div_u64(p->runtime, 1000 * 1000),
> + div_u64(ktime_to_ns(p->time), 1000 * 1000));
> + idx++;
> + }
> + }
> +
> + if (qos) {
> + pm_qos_remove_request(qos);
> + kfree(qos);
> + }
> + kfree(engines);
> + return err;
> +}
> +
> +int i915_request_perf_selftests(struct drm_i915_private *i915)
> +{
> + static const struct i915_subtest tests[] = {
> + SUBTEST(perf_parallel_engines),
> + };
> +
> + if (intel_gt_is_wedged(&i915->gt))
> + return 0;
> +
> + return i915_subtests(tests, i915);
> +}
>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2020-03-10 10:38 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-06 13:38 [Intel-gfx] [PATCH 01/17] drm/i915/selftests: Apply a heavy handed flush to i915_active Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 02/17] drm/i915/execlists: Enable timeslice on partial virtual engine dequeue Chris Wilson
2020-03-07 23:20 ` Sasha Levin
2020-03-06 13:38 ` [Intel-gfx] [PATCH 03/17] drm/i915: Improve the start alignment of bonded pairs Chris Wilson
2020-03-10 9:59 ` Tvrtko Ursulin
2020-03-06 13:38 ` [Intel-gfx] [PATCH 04/17] drm/i915: Tweak scheduler's kick_submission() Chris Wilson
2020-03-10 10:07 ` Tvrtko Ursulin
2020-03-10 11:00 ` Chris Wilson
2020-03-10 11:47 ` Tvrtko Ursulin
2020-03-06 13:38 ` [Intel-gfx] [PATCH 05/17] drm/i915: Wrap i915_active in a simple kreffed struct Chris Wilson
2020-03-06 14:44 ` Mika Kuoppala
2020-03-06 13:38 ` [Intel-gfx] [PATCH 06/17] drm/i915: Extend i915_request_await_active to use all timelines Chris Wilson
2020-03-10 10:18 ` Tvrtko Ursulin
2020-03-06 13:38 ` [Intel-gfx] [PATCH 07/17] drm/i915/perf: Schedule oa_config after modifying the contexts Chris Wilson
2020-03-06 14:20 ` Lionel Landwerlin
2020-03-10 11:17 ` Chris Wilson
2020-03-10 12:01 ` Lionel Landwerlin
2020-03-06 13:38 ` [Intel-gfx] [PATCH 08/17] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
2020-03-10 10:38 ` Tvrtko Ursulin [this message]
2020-03-10 11:09 ` Chris Wilson
2020-03-10 11:58 ` Tvrtko Ursulin
2020-03-10 12:06 ` Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 09/17] dma-buf: Prettify typecasts for dma-fence-chain Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 10/17] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 11/17] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 12/17] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 13/17] drm/syncobj: Allow use of dma-fence-proxy Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 14/17] drm/i915/gem: Teach execbuf how to wait on future syncobj Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 15/17] drm/i915/gem: Allow combining submit-fences with syncobj Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 16/17] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 17/17] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
2020-03-06 14:35 ` [Intel-gfx] [PATCH 01/17] drm/i915/selftests: Apply a heavy handed flush to i915_active Mika Kuoppala
2020-03-06 21:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/17] " Patchwork
2020-03-06 21:33 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2020-03-06 21:59 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e5360a24-91e9-a873-5a9b-5b6e5cca59ac@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).