All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH 08/17] drm/i915/selftests: Add request throughput measurement to perf
Date: Tue, 10 Mar 2020 10:38:21 +0000	[thread overview]
Message-ID: <e5360a24-91e9-a873-5a9b-5b6e5cca59ac@linux.intel.com> (raw)
In-Reply-To: <20200306133852.3420322-8-chris@chris-wilson.co.uk>


On 06/03/2020 13:38, Chris Wilson wrote:
> Under ideal circumstances, the driver should be able to keep the GPU
> fully saturated with work. Measure how close to ideal we get under the
> harshest of conditions with no user payload.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
>   drivers/gpu/drm/i915/selftests/i915_request.c | 285 +++++++++++++++++-
>   2 files changed, 285 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> index 3bf7f53e9924..d8da142985eb 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
> @@ -16,5 +16,6 @@
>    * Tests are executed in order by igt/i915_selftest
>    */
>   selftest(engine_cs, intel_engine_cs_perf_selftests)
> +selftest(request, i915_request_perf_selftests)
>   selftest(blt, i915_gem_object_blt_perf_selftests)
>   selftest(region, intel_memory_region_perf_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index f89d9c42f1fa..d4c088cfe4e1 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -23,6 +23,7 @@
>    */
>   
>   #include <linux/prime_numbers.h>
> +#include <linux/pm_qos.h>
>   
>   #include "gem/i915_gem_pm.h"
>   #include "gem/selftests/mock_context.h"
> @@ -1233,7 +1234,7 @@ static int live_parallel_engines(void *arg)
>   		struct igt_live_test t;
>   		unsigned int idx;
>   
> -		snprintf(name, sizeof(name), "%pS", fn);
> +		snprintf(name, sizeof(name), "%ps", *fn);
>   		err = igt_live_test_begin(&t, i915, __func__, name);
>   		if (err)
>   			break;
> @@ -1470,3 +1471,285 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
>   
>   	return i915_subtests(tests, i915);
>   }
> +
> +struct perf_parallel {
> +	struct intel_engine_cs *engine;
> +	unsigned long count;
> +	ktime_t time;
> +	ktime_t busy;
> +	u64 runtime;
> +};
> +
> +static int switch_to_kernel_sync(struct intel_context *ce, int err)
> +{
> +	struct i915_request *rq;
> +	struct dma_fence *fence;
> +
> +	rq = intel_engine_create_kernel_request(ce->engine);
> +	if (IS_ERR(rq))
> +		return PTR_ERR(rq);
> +
> +	fence = i915_active_fence_get(&ce->timeline->last_request);
> +	if (fence) {
> +		i915_request_await_dma_fence(rq, fence);
> +		dma_fence_put(fence);
> +	}
> +
> +	rq = i915_request_get(rq);
> +	i915_request_add(rq);
> +	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
> +		err = -ETIME;
> +	i915_request_put(rq);
> +
> +	while (!err && !intel_engine_is_idle(ce->engine))
> +		intel_engine_flush_submission(ce->engine);
> +
> +	return err;
> +}
> +
> +static int perf_sync(void *arg)
> +{
> +	struct perf_parallel *p = arg;
> +	struct intel_engine_cs *engine = p->engine;
> +	struct intel_context *ce;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long count;
> +	bool busy;
> +	int err = 0;
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
> +	err = intel_context_pin(ce);
> +	if (err) {
> +		intel_context_put(ce);
> +		return err;
> +	}
> +
> +	busy = false;
> +	if (intel_engine_supports_stats(engine) &&
> +	    !intel_enable_engine_stats(engine)) {
> +		p->busy = intel_engine_get_busy_time(engine);
> +		busy = true;
> +	}
> +
> +	p->time = ktime_get();
> +	count = 0;
> +	do {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_create(ce);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		i915_request_get(rq);
> +		i915_request_add(rq);
> +
> +		err = 0;
> +		if (i915_request_wait(rq, 0, HZ / 5) < 0)
> +			err = -ETIME;
> +		i915_request_put(rq);
> +		if (err)
> +			break;
> +
> +		count++;
> +	} while (!__igt_timeout(end_time, NULL));
> +	p->time = ktime_sub(ktime_get(), p->time);
> +
> +	if (busy) {
> +		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> +				    p->busy);
> +		intel_disable_engine_stats(engine);
> +	}
> +
> +	err = switch_to_kernel_sync(ce, err);
> +	p->runtime = intel_context_get_total_runtime_ns(ce);
> +	p->count = count;
> +
> +	intel_context_unpin(ce);
> +	intel_context_put(ce);
> +	return err;
> +}
> +
> +static int perf_many(void *arg)
> +{
> +	struct perf_parallel *p = arg;
> +	struct intel_engine_cs *engine = p->engine;
> +	struct intel_context *ce;
> +	IGT_TIMEOUT(end_time);
> +	unsigned long count;
> +	int err = 0;
> +	bool busy;
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce))
> +		return PTR_ERR(ce);
> +
> +	err = intel_context_pin(ce);
> +	if (err) {
> +		intel_context_put(ce);
> +		return err;
> +	}
> +
> +	busy = false;
> +	if (intel_engine_supports_stats(engine) &&
> +	    !intel_enable_engine_stats(engine)) {
> +		p->busy = intel_engine_get_busy_time(engine);
> +		busy = true;
> +	}
> +
> +	count = 0;
> +	p->time = ktime_get();
> +	do {
> +		struct i915_request *rq;
> +
> +		rq = i915_request_create(ce);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		i915_request_add(rq);

Any concerns on ring size here and maybe managing the wait explicitly?

> +		count++;
> +	} while (!__igt_timeout(end_time, NULL));
> +	p->time = ktime_sub(ktime_get(), p->time);
> +
> +	if (busy) {
> +		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
> +				    p->busy);
> +		intel_disable_engine_stats(engine);
> +	}
> +
> +	err = switch_to_kernel_sync(ce, err);
> +	p->runtime = intel_context_get_total_runtime_ns(ce);
> +	p->count = count;
> +
> +	intel_context_unpin(ce);
> +	intel_context_put(ce);
> +	return err;
> +}
> +
> +static int perf_parallel_engines(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	static int (* const func[])(void *arg) = {
> +		perf_sync,
> +		perf_many,
> +		NULL,
> +	};
> +	const unsigned int nengines = num_uabi_engines(i915);
> +	struct intel_engine_cs *engine;
> +	int (* const *fn)(void *arg);
> +	struct pm_qos_request *qos;
> +	struct {
> +		struct perf_parallel p;
> +		struct task_struct *tsk;
> +	} *engines;
> +	int err = 0;
> +
> +	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
> +	if (!engines)
> +		return -ENOMEM;
> +
> +	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
> +	if (qos)
> +		pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
> +
> +	for (fn = func; *fn; fn++) {
> +		char name[KSYM_NAME_LEN];
> +		struct igt_live_test t;
> +		unsigned int idx;
> +
> +		snprintf(name, sizeof(name), "%ps", *fn);

Is this any better than just storing the name in local static array?

> +		err = igt_live_test_begin(&t, i915, __func__, name);
> +		if (err)
> +			break;
> +
> +		atomic_set(&i915->selftest.counter, nengines);
> +
> +		idx = 0;
> +		for_each_uabi_engine(engine, i915) {

For a pure driver overhead test I would suggest this to be a gt live test.

> +			intel_engine_pm_get(engine);
> +
> +			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
> +			engines[idx].p.engine = engine;
> +
> +			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
> +						       "igt:%s", engine->name);

Test will get affected by the host CPU core count. How about we only 
measure num_cpu engines? Might be even more important with discrete.

> +			if (IS_ERR(engines[idx].tsk)) {
> +				err = PTR_ERR(engines[idx].tsk);
> +				intel_engine_pm_put(engine);
> +				break;
> +			}
> +			get_task_struct(engines[idx++].tsk);
> +		}
> +
> +		yield(); /* start all threads before we kthread_stop() */
> +
> +		idx = 0;
> +		for_each_uabi_engine(engine, i915) {
> +			int status;
> +
> +			if (IS_ERR(engines[idx].tsk))
> +				break;
> +
> +			status = kthread_stop(engines[idx].tsk);
> +			if (status && !err)
> +				err = status;
> +
> +			intel_engine_pm_put(engine);
> +			put_task_struct(engines[idx++].tsk);
> +		}
> +
> +		if (igt_live_test_end(&t))
> +			err = -EIO;
> +		if (err)
> +			break;
> +
> +		idx = 0;
> +		for_each_uabi_engine(engine, i915) {
> +			struct perf_parallel *p = &engines[idx].p;
> +			u64 busy = 100 * ktime_to_ns(p->busy);
> +			u64 dt = ktime_to_ns(p->time);
> +			int integer, decimal;
> +
> +			if (dt) {
> +				integer = div64_u64(busy, dt);
> +				busy -= integer * dt;
> +				decimal = div64_u64(100 * busy, dt);
> +			} else {
> +				integer = 0;
> +				decimal = 0;
> +			}
> +
> +			GEM_BUG_ON(engine != p->engine);
> +			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
> +				name, engine->name, p->count, integer, decimal,
> +				div_u64(p->runtime, 1000 * 1000),
> +				div_u64(ktime_to_ns(p->time), 1000 * 1000));
> +			idx++;
> +		}
> +	}
> +
> +	if (qos) {
> +		pm_qos_remove_request(qos);
> +		kfree(qos);
> +	}
> +	kfree(engines);
> +	return err;
> +}
> +
> +int i915_request_perf_selftests(struct drm_i915_private *i915)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(perf_parallel_engines),
> +	};
> +
> +	if (intel_gt_is_wedged(&i915->gt))
> +		return 0;
> +
> +	return i915_subtests(tests, i915);
> +}
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2020-03-10 10:38 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-06 13:38 [Intel-gfx] [PATCH 01/17] drm/i915/selftests: Apply a heavy handed flush to i915_active Chris Wilson
2020-03-06 13:38 ` [PATCH 02/17] drm/i915/execlists: Enable timeslice on partial virtual engine dequeue Chris Wilson
2020-03-06 13:38   ` [Intel-gfx] " Chris Wilson
2020-03-07 23:20   ` Sasha Levin
2020-03-07 23:20     ` [Intel-gfx] " Sasha Levin
2020-03-06 13:38 ` [Intel-gfx] [PATCH 03/17] drm/i915: Improve the start alignment of bonded pairs Chris Wilson
2020-03-10  9:59   ` Tvrtko Ursulin
2020-03-06 13:38 ` [Intel-gfx] [PATCH 04/17] drm/i915: Tweak scheduler's kick_submission() Chris Wilson
2020-03-10 10:07   ` Tvrtko Ursulin
2020-03-10 11:00     ` Chris Wilson
2020-03-10 11:47       ` Tvrtko Ursulin
2020-03-06 13:38 ` [Intel-gfx] [PATCH 05/17] drm/i915: Wrap i915_active in a simple kreffed struct Chris Wilson
2020-03-06 14:44   ` Mika Kuoppala
2020-03-06 13:38 ` [Intel-gfx] [PATCH 06/17] drm/i915: Extend i915_request_await_active to use all timelines Chris Wilson
2020-03-10 10:18   ` Tvrtko Ursulin
2020-03-06 13:38 ` [Intel-gfx] [PATCH 07/17] drm/i915/perf: Schedule oa_config after modifying the contexts Chris Wilson
2020-03-06 14:20   ` Lionel Landwerlin
2020-03-10 11:17   ` Chris Wilson
2020-03-10 12:01     ` Lionel Landwerlin
2020-03-06 13:38 ` [Intel-gfx] [PATCH 08/17] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
2020-03-10 10:38   ` Tvrtko Ursulin [this message]
2020-03-10 11:09     ` Chris Wilson
2020-03-10 11:58       ` Tvrtko Ursulin
2020-03-10 12:06         ` Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 09/17] dma-buf: Prettify typecasts for dma-fence-chain Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 10/17] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 11/17] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 12/17] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 13/17] drm/syncobj: Allow use of dma-fence-proxy Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 14/17] drm/i915/gem: Teach execbuf how to wait on future syncobj Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 15/17] drm/i915/gem: Allow combining submit-fences with syncobj Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 16/17] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
2020-03-06 13:38 ` [Intel-gfx] [PATCH 17/17] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
2020-03-06 14:35 ` [Intel-gfx] [PATCH 01/17] drm/i915/selftests: Apply a heavy handed flush to i915_active Mika Kuoppala
2020-03-06 21:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/17] " Patchwork
2020-03-06 21:33 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2020-03-06 21:59 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e5360a24-91e9-a873-5a9b-5b6e5cca59ac@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.