All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf
@ 2020-04-03  9:12 Chris Wilson
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 02/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
                   ` (11 more replies)
  0 siblings, 12 replies; 31+ messages in thread
From: Chris Wilson @ 2020-04-03  9:12 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Under ideal circumstances, the driver should be able to keep the GPU
fully saturated with work. Measure how close to ideal we get under the
harshest of conditions with no user payload.

v2: Also measure throughput using only one thread.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_request.c | 590 +++++++++++++++++-
 2 files changed, 590 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index 3bf7f53e9924..d8da142985eb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -16,5 +16,6 @@
  * Tests are executed in order by igt/i915_selftest
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(request, i915_request_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 1dab0360f76a..3cf0599cec4b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prime_numbers.h>
+#include <linux/pm_qos.h>
 
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
@@ -1239,7 +1240,7 @@ static int live_parallel_engines(void *arg)
 		struct igt_live_test t;
 		unsigned int idx;
 
-		snprintf(name, sizeof(name), "%ps", fn);
+		snprintf(name, sizeof(name), "%ps", *fn);
 		err = igt_live_test_begin(&t, i915, __func__, name);
 		if (err)
 			break;
@@ -1476,3 +1477,590 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+static int switch_to_kernel_sync(struct intel_context *ce, int err)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	while (!err && !intel_engine_is_idle(ce->engine))
+		intel_engine_flush_submission(ce->engine);
+
+	return err;
+}
+
+struct perf_stats {
+	struct intel_engine_cs *engine;
+	unsigned long count;
+	ktime_t time;
+	ktime_t busy;
+	u64 runtime;
+};
+
+struct perf_series {
+	struct drm_i915_private *i915;
+	unsigned int nengines;
+	struct intel_context *ce[];
+};
+
+static int s_sync0(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return err;
+}
+
+static int s_sync1(void *arg)
+{
+	struct perf_series *ps = arg;
+	struct i915_request *prev = NULL;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+
+	return err;
+}
+
+static int s_many(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
+
+		i915_request_add(rq);
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return 0;
+}
+
+static int perf_series_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		s_sync0,
+		s_sync1,
+		s_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct perf_stats *stats;
+	struct perf_series *ps;
+	unsigned int idx;
+	int err = 0;
+
+	stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
+	if (!ps) {
+		kfree(stats);
+		return -ENOMEM;
+	}
+
+	ps->i915 = i915;
+	ps->nengines = nengines;
+
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			goto out;
+
+		err = intel_context_pin(ce);
+		if (err) {
+			intel_context_put(ce);
+			goto out;
+		}
+
+		ps->ce[idx++] = ce;
+	}
+	GEM_BUG_ON(idx != ps->nengines);
+
+	for (fn = func; *fn && !err; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p =
+				memset(&stats[idx], 0, sizeof(stats[idx]));
+			struct intel_context *ce = ps->ce[idx];
+
+			p->engine = ps->ce[idx]->engine;
+			intel_engine_pm_get(p->engine);
+
+			if (intel_engine_supports_stats(p->engine) &&
+			    !intel_enable_engine_stats(p->engine))
+				p->busy = intel_engine_get_busy_time(p->engine) + 1;
+			p->runtime = -intel_context_get_total_runtime_ns(ce);
+			p->time = ktime_get();
+		}
+
+		err = (*fn)(ps);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p = &stats[idx];
+			struct intel_context *ce = ps->ce[idx];
+			int integer, decimal;
+			u64 busy, dt;
+
+			p->time = ktime_sub(ktime_get(), p->time);
+			if (p->busy) {
+				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
+						    p->busy - 1);
+				intel_disable_engine_stats(p->engine);
+			}
+
+			err = switch_to_kernel_sync(ce, err);
+			p->runtime += intel_context_get_total_runtime_ns(ce);
+			intel_engine_pm_put(p->engine);
+
+			busy = 100 * ktime_to_ns(p->busy);
+			dt = ktime_to_ns(p->time);
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, p->engine->name, ce->timeline->seqno,
+				integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+		}
+	}
+
+out:
+	for (idx = 0; idx < nengines; idx++) {
+		if (IS_ERR_OR_NULL(ps->ce[idx]))
+			break;
+
+		intel_context_unpin(ps->ce[idx]);
+		intel_context_put(ps->ce[idx]);
+	}
+	kfree(ps);
+
+	if (qos) {
+		pm_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(stats);
+	return err;
+}
+
+static int p_sync0(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_sync1(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct i915_request *prev = NULL;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_many(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+	bool busy;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	count = 0;
+	p->time = ktime_get();
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		p_sync0,
+		p_sync1,
+		p_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct {
+		struct perf_stats p;
+		struct task_struct *tsk;
+	} *engines;
+	int err = 0;
+
+	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	for (fn = func; *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			intel_engine_pm_get(engine);
+
+			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+			engines[idx].p.engine = engine;
+
+			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+						       "igt:%s", engine->name);
+			if (IS_ERR(engines[idx].tsk)) {
+				err = PTR_ERR(engines[idx].tsk);
+				intel_engine_pm_put(engine);
+				break;
+			}
+			get_task_struct(engines[idx++].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(engines[idx].tsk))
+				break;
+
+			status = kthread_stop(engines[idx].tsk);
+			if (status && !err)
+				err = status;
+
+			intel_engine_pm_put(engine);
+			put_task_struct(engines[idx++].tsk);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			struct perf_stats *p = &engines[idx].p;
+			u64 busy = 100 * ktime_to_ns(p->busy);
+			u64 dt = ktime_to_ns(p->time);
+			int integer, decimal;
+
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			GEM_BUG_ON(engine != p->engine);
+			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, engine->name, p->count, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+			idx++;
+		}
+	}
+
+	if (qos) {
+		pm_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(engines);
+	return err;
+}
+
+int i915_request_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_series_engines),
+		SUBTEST(perf_parallel_engines),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [Intel-gfx] [PATCH 02/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
@ 2020-04-03  9:12 ` Chris Wilson
  2020-04-07  9:07   ` Tvrtko Ursulin
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 03/10] dma-buf: Prettify typecasts for dma-fence-chain Chris Wilson
                   ` (10 subsequent siblings)
  11 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2020-04-03  9:12 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kenneth Graunke, Chris Wilson

If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the
user batch or in our own preamble, the engine raises a
GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so
respond to a semaphore wait by yielding the timeslice, if we have
another context to yield to!

The only real complication is that the interrupt is only generated for
the start of the semaphore wait, and is asynchronous to our
process_csb() -- that is, we may not have registered the timeslice before
we see the interrupt. To ensure we don't miss a potential semaphore
blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark
the interrupt and apply it to the next timeslice regardless of whether it
was active at the time.

v2: We use semaphores in preempt-to-busy, within the timeslicing
implementation itself! Ergo, when we do insert a preemption due to an
expired timeslice, the new context may start with the missed semaphore
flagged by the retired context and be yielded, ad infinitum. To avoid
this, read the context id at the time of the semaphore interrupt and
only yield if that context is still active.

Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  6 +++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 +++++
 drivers/gpu/drm/i915/gt/intel_gt_irq.c       | 13 ++++++-
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 40 +++++++++++++++++---
 drivers/gpu/drm/i915/gt/selftest_lrc.c       | 15 +++-----
 drivers/gpu/drm/i915/i915_reg.h              |  1 +
 6 files changed, 67 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 843cb6f2f696..04995040407d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1313,6 +1313,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 
 	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
 		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+	if (HAS_EXECLISTS(dev_priv)) {
+		drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+		drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+	}
 	drm_printf(m, "\tRING_START: 0x%08x\n",
 		   ENGINE_READ(engine, RING_START));
 	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 80cdde712842..ac283ab5d89c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -156,6 +156,15 @@ struct intel_engine_execlists {
 	 */
 	struct i915_priolist default_priolist;
 
+	/**
+	 * @yield: CCID at the time of the last semaphore-wait interrupt.
+	 *
+	 * Instead of leaving a semaphore busy-spinning on an engine, we would
+	 * like to switch to another ready context, i.e. yielding the semaphore
+	 * timeslice.
+	 */
+	u32 yield;
+
 	/**
 	 * @error_interrupt: CS Master EIR
 	 *
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f0e7fd95165a..875bd0392ffc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -39,6 +39,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 		}
 	}
 
+	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+		WRITE_ONCE(engine->execlists.yield,
+			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+		if (del_timer(&engine->execlists.timer))
+			tasklet = true;
+	}
+
 	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
 		tasklet = true;
 
@@ -228,7 +235,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
 	const u32 irqs =
 		GT_CS_MASTER_ERROR_INTERRUPT |
 		GT_RENDER_USER_INTERRUPT |
-		GT_CONTEXT_SWITCH_INTERRUPT;
+		GT_CONTEXT_SWITCH_INTERRUPT |
+		GT_WAIT_SEMAPHORE_INTERRUPT;
 	struct intel_uncore *uncore = gt->uncore;
 	const u32 dmask = irqs << 16 | irqs;
 	const u32 smask = irqs << 16;
@@ -366,7 +374,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
 	const u32 irqs =
 		GT_CS_MASTER_ERROR_INTERRUPT |
 		GT_RENDER_USER_INTERRUPT |
-		GT_CONTEXT_SWITCH_INTERRUPT;
+		GT_CONTEXT_SWITCH_INTERRUPT |
+		GT_WAIT_SEMAPHORE_INTERRUPT;
 	const u32 gt_interrupts[] = {
 		irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
 		irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f028114714cd..55a58709590a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1768,7 +1768,8 @@ static void defer_active(struct intel_engine_cs *engine)
 }
 
 static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+	       const struct i915_request *rq)
 {
 	int hint;
 
@@ -1782,6 +1783,31 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
 	return hint >= effective_prio(rq);
 }
 
+static bool
+timeslice_yield(const struct intel_engine_execlists *el,
+		const struct i915_request *rq)
+{
+	/*
+	 * Once bitten, forever smitten!
+	 *
+	 * If the active context ever busy-waited on a semaphore,
+	 * it will be treated as a hog until the end of its timeslice.
+	 * The HW only sends an interrupt on the first miss, and we
+	 * do know if that semaphore has been signaled, or even if it
+	 * is now stuck on another semaphore. Play safe, yield if it
+	 * might be stuck -- it will be given a fresh timeslice in
+	 * the near future.
+	 */
+	return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield);
+}
+
+static bool
+timeslice_expired(const struct intel_engine_execlists *el,
+		  const struct i915_request *rq)
+{
+	return timer_expired(&el->timer) || timeslice_yield(el, rq);
+}
+
 static int
 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
 {
@@ -1797,8 +1823,7 @@ timeslice(const struct intel_engine_cs *engine)
 	return READ_ONCE(engine->props.timeslice_duration_ms);
 }
 
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
 {
 	const struct intel_engine_execlists *execlists = &engine->execlists;
 	const struct i915_request *rq = *execlists->active;
@@ -1989,18 +2014,19 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 			last = NULL;
 		} else if (need_timeslice(engine, last) &&
-			   timer_expired(&engine->execlists.timer)) {
+			   timeslice_expired(execlists, last)) {
 			if (i915_request_completed(last)) {
 				tasklet_hi_schedule(&execlists->tasklet);
 				return;
 			}
 
 			ENGINE_TRACE(engine,
-				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
+				     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
 				     last->fence.context,
 				     last->fence.seqno,
 				     last->sched.attr.priority,
-				     execlists->queue_priority_hint);
+				     execlists->queue_priority_hint,
+				     yesno(timeslice_yield(execlists, last)));
 
 			ring_set_paused(engine, 1);
 			defer_active(engine);
@@ -2261,6 +2287,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		}
 		clear_ports(port + 1, last_port - port);
 
+		WRITE_ONCE(execlists->yield, -1);
 		execlists_submit_ports(engine);
 		set_preempt_timeout(engine, *active);
 	} else {
@@ -4563,6 +4590,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 	engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
+	engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 985d4041d929..8e8b0c0ddc76 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1071,15 +1071,12 @@ static int live_timeslice_rewind(void *arg)
 		GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
 
 		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
-		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
-		GEM_BUG_ON(!i915_request_is_active(rq[A2]));
-		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
-
-		/* Wait for the timeslice to kick in */
-		del_timer(&engine->execlists.timer);
-		tasklet_hi_schedule(&engine->execlists.tasklet);
-		intel_engine_flush_submission(engine);
-
+		if (i915_request_is_active(rq[A2])) {
+			/* Wait for the timeslice to kick in */
+			del_timer(&engine->execlists.timer);
+			tasklet_hi_schedule(&engine->execlists.tasklet);
+			intel_engine_flush_submission(engine);
+		}
 		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
 		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
 		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 17484345cb80..f402a9f78969 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GT_BSD_CS_ERROR_INTERRUPT		(1 << 15)
 #define GT_BSD_USER_INTERRUPT			(1 << 12)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1	(1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
+#define GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11) /* bdw+ */
 #define GT_CONTEXT_SWITCH_INTERRUPT		(1 <<  8)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT	(1 <<  5) /* !snb */
 #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [Intel-gfx] [PATCH 03/10] dma-buf: Prettify typecasts for dma-fence-chain
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 02/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
@ 2020-04-03  9:12 ` Chris Wilson
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2020-04-03  9:12 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Inside dma-fence-chain, we use a cmpxchg on an RCU-protected pointer. To
avoid the sparse warning for using the RCU pointer directly, we have to
cast away the __rcu annotation. However, we don't need to use void*
everywhere and can stick to the dma_fence*.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/dma-buf/dma-fence-chain.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
index 44a741677d25..3d123502ff12 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -62,7 +62,8 @@ struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
 			replacement = NULL;
 		}
 
-		tmp = cmpxchg((void **)&chain->prev, (void *)prev, (void *)replacement);
+		tmp = cmpxchg((struct dma_fence __force **)&chain->prev,
+			      prev, replacement);
 		if (tmp == prev)
 			dma_fence_put(tmp);
 		else
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 02/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 03/10] dma-buf: Prettify typecasts for dma-fence-chain Chris Wilson
@ 2020-04-03  9:12 ` Chris Wilson
  2020-04-08 19:46   ` Venkata Sandeep Dhanalakota
  2020-04-08 20:00   ` Lionel Landwerlin
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 05/10] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
                   ` (8 subsequent siblings)
  11 siblings, 2 replies; 31+ messages in thread
From: Chris Wilson @ 2020-04-03  9:12 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Whenever we walk along the dma-fence-chain, we prune signaled links to
keep the chain nice and tidy. This leads to situations where we can
prune a link and report the earlier fence as the target seqno --
violating our own consistency checks that the seqno is not more advanced
than the last element in a dma-fence-chain.

Report a NULL fence and success if the seqno has already been signaled.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/dma-buf/dma-fence-chain.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
index 3d123502ff12..c435bbba851c 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -99,6 +99,12 @@ int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
 		return -EINVAL;
 
 	dma_fence_chain_for_each(*pfence, &chain->base) {
+		if ((*pfence)->seqno < seqno) { /* already signaled */
+			dma_fence_put(*pfence);
+			*pfence = NULL;
+			break;
+		}
+
 		if ((*pfence)->context != chain->base.context ||
 		    to_dma_fence_chain(*pfence)->prev_seqno < seqno)
 			break;
@@ -222,6 +228,7 @@ EXPORT_SYMBOL(dma_fence_chain_ops);
  * @chain: the chain node to initialize
  * @prev: the previous fence
  * @fence: the current fence
+ * @seqno: the sequence number (syncpt) of the fence within the chain
  *
  * Initialize a new chain node and either start a new chain or add the node to
  * the existing chain of the previous fence.
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [Intel-gfx] [PATCH 05/10] dma-buf: Exercise dma-fence-chain under selftests
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
                   ` (2 preceding siblings ...)
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
@ 2020-04-03  9:12 ` Chris Wilson
  2020-04-08 19:49   ` Venkata Sandeep Dhanalakota
  2020-04-10 16:11   ` Lionel Landwerlin
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 06/10] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
                   ` (7 subsequent siblings)
  11 siblings, 2 replies; 31+ messages in thread
From: Chris Wilson @ 2020-04-03  9:12 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

A few very simple testcases to exercise the dma-fence-chain API.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/dma-buf/Makefile             |   3 +-
 drivers/dma-buf/selftests.h          |   1 +
 drivers/dma-buf/st-dma-fence-chain.c | 713 +++++++++++++++++++++++++++
 3 files changed, 716 insertions(+), 1 deletion(-)
 create mode 100644 drivers/dma-buf/st-dma-fence-chain.c

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 9c190026bfab..995e05f609ff 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_UDMABUF)		+= udmabuf.o
 
 dmabuf_selftests-y := \
 	selftest.o \
-	st-dma-fence.o
+	st-dma-fence.o \
+	st-dma-fence-chain.o
 
 obj-$(CONFIG_DMABUF_SELFTESTS)	+= dmabuf_selftests.o
diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h
index 5320386f02e5..55918ef9adab 100644
--- a/drivers/dma-buf/selftests.h
+++ b/drivers/dma-buf/selftests.h
@@ -11,3 +11,4 @@
  */
 selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */
 selftest(dma_fence, dma_fence)
+selftest(dma_fence_chain, dma_fence_chain)
diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
new file mode 100644
index 000000000000..bd08ba67b03b
--- /dev/null
+++ b/drivers/dma-buf/st-dma-fence-chain.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-fence-chain.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+
+#include "selftest.h"
+
+static struct kmem_cache *slab_fences;
+
+static inline struct mock_fence {
+	struct dma_fence base;
+	spinlock_t lock;
+} *to_mock_fence(struct dma_fence *f) {
+	return container_of(f, struct mock_fence, base);
+}
+
+static const char *mock_name(struct dma_fence *f)
+{
+	return "mock";
+}
+
+static void mock_fence_release(struct dma_fence *f)
+{
+	kmem_cache_free(slab_fences, to_mock_fence(f));
+}
+
+static const struct dma_fence_ops mock_ops = {
+	.get_driver_name = mock_name,
+	.get_timeline_name = mock_name,
+	.release = mock_fence_release,
+};
+
+static struct dma_fence *mock_fence(void)
+{
+	struct mock_fence *f;
+
+	f = kmem_cache_alloc(slab_fences, GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	spin_lock_init(&f->lock);
+	dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0);
+
+	return &f->base;
+}
+
+static inline struct mock_chain {
+	struct dma_fence_chain base;
+} *to_mock_chain(struct dma_fence *f) {
+	return container_of(f, struct mock_chain, base.base);
+}
+
+static struct dma_fence *mock_chain(struct dma_fence *prev,
+				    struct dma_fence *fence,
+				    u64 seqno)
+{
+	struct mock_chain *f;
+
+	f = kmalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	dma_fence_chain_init(&f->base,
+			     dma_fence_get(prev),
+			     dma_fence_get(fence),
+			     seqno);
+
+	return &f->base.base;
+}
+
+static int sanitycheck(void *arg)
+{
+	struct dma_fence *f, *chain;
+	int err = 0;
+
+	f = mock_fence();
+	if (!f)
+		return -ENOMEM;
+
+	chain = mock_chain(NULL, f, 1);
+	if (!chain)
+		err = -ENOMEM;
+
+	dma_fence_signal(f);
+	dma_fence_put(f);
+
+	dma_fence_put(chain);
+
+	return err;
+}
+
+struct fence_chains {
+	unsigned int chain_length;
+	struct dma_fence **fences;
+	struct dma_fence **chains;
+
+	struct dma_fence *tail;
+};
+
+static uint64_t seqno_inc(unsigned int i)
+{
+	return i + 1;
+}
+
+static int fence_chains_init(struct fence_chains *fc, unsigned int count,
+			     uint64_t (*seqno_fn)(unsigned int))
+{
+	unsigned int i;
+	int err = 0;
+
+	fc->chains = kvmalloc_array(count, sizeof(*fc->chains),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!fc->chains)
+		return -ENOMEM;
+
+	fc->fences = kvmalloc_array(count, sizeof(*fc->fences),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!fc->fences) {
+		err = -ENOMEM;
+		goto err_chains;
+	}
+
+	fc->tail = NULL;
+	for (i = 0; i < count; i++) {
+		fc->fences[i] = mock_fence();
+		if (!fc->fences[i]) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+
+		fc->chains[i] = mock_chain(fc->tail,
+					   fc->fences[i],
+					   seqno_fn(i));
+		if (!fc->chains[i]) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+
+		fc->tail = fc->chains[i];
+	}
+
+	fc->chain_length = i;
+	return 0;
+
+unwind:
+	for (i = 0; i < count; i++) {
+		dma_fence_put(fc->fences[i]);
+		dma_fence_put(fc->chains[i]);
+	}
+	kvfree(fc->fences);
+err_chains:
+	kvfree(fc->chains);
+	return err;
+}
+
+static void fence_chains_fini(struct fence_chains *fc)
+{
+	unsigned int i;
+
+	for (i = 0; i < fc->chain_length; i++) {
+		dma_fence_signal(fc->fences[i]);
+		dma_fence_put(fc->fences[i]);
+	}
+	kvfree(fc->fences);
+
+	for (i = 0; i < fc->chain_length; i++)
+		dma_fence_put(fc->chains[i]);
+	kvfree(fc->chains);
+}
+
+static int find_seqno(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc);
+	if (err)
+		return err;
+
+	fence = dma_fence_get(fc.tail);
+	err = dma_fence_chain_find_seqno(&fence, 0);
+	dma_fence_put(fence);
+	if (err) {
+		pr_err("Reported %d for find_seqno(0)!\n", err);
+		goto err;
+	}
+
+	for (i = 0; i < fc.chain_length; i++) {
+		fence = dma_fence_get(fc.tail);
+		err = dma_fence_chain_find_seqno(&fence, i + 1);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Reported %d for find_seqno(%d:%d)!\n",
+			       err, fc.chain_length + 1, i + 1);
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",
+			       fc.chain_length + 1, i + 1);
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, i + 1);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Error reported for finding self\n");
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence reported by find self\n");
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, i + 2);
+		dma_fence_put(fence);
+		if (!err) {
+			pr_err("Error not reported for future fence: find_seqno(%d:%d)!\n",
+			       i + 1, i + 2);
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, i);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Error reported for previous fence!\n");
+			goto err;
+		}
+		if (i > 0 && fence != fc.chains[i - 1]) {
+			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",
+			       i + 1, i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int find_signaled(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+
+	err = fence_chains_init(&fc, 2, seqno_inc);
+	if (err)
+		return err;
+
+	dma_fence_signal(fc.fences[0]);
+
+	fence = dma_fence_get(fc.tail);
+	err = dma_fence_chain_find_seqno(&fence, 1);
+	dma_fence_put(fence);
+	if (err) {
+		pr_err("Reported %d for find_seqno()!\n", err);
+		goto err;
+	}
+
+	if (fence && fence != fc.chains[0]) {
+		pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:1\n",
+		       fence->seqno);
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, 1);
+		dma_fence_put(fence);
+		if (err)
+			pr_err("Reported %d for finding self!\n", err);
+
+		err = -EINVAL;
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int find_out_of_order(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+
+	err = fence_chains_init(&fc, 3, seqno_inc);
+	if (err)
+		return err;
+
+	dma_fence_signal(fc.fences[1]);
+
+	fence = dma_fence_get(fc.tail);
+	err = dma_fence_chain_find_seqno(&fence, 2);
+	dma_fence_put(fence);
+	if (err) {
+		pr_err("Reported %d for find_seqno()!\n", err);
+		goto err;
+	}
+
+	if (fence && fence != fc.chains[1]) {
+		pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:2\n",
+		       fence->seqno);
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, 2);
+		dma_fence_put(fence);
+		if (err)
+			pr_err("Reported %d for finding self!\n", err);
+
+		err = -EINVAL;
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static uint64_t seqno_inc2(unsigned int i)
+{
+	return 2 * i + 2;
+}
+
+static int find_gap(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc2);
+	if (err)
+		return err;
+
+	for (i = 0; i < fc.chain_length; i++) {
+		fence = dma_fence_get(fc.tail);
+		err = dma_fence_chain_find_seqno(&fence, 2 * i + 1);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Reported %d for find_seqno(%d:%d)!\n",
+			       err, fc.chain_length + 1, 2 * i + 1);
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence.seqno:%lld reported by find_seqno(%d:%d)\n",
+			       fence->seqno,
+			       fc.chain_length + 1,
+			       2 * i + 1);
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, 2 * i + 2);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Error reported for finding self\n");
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence reported by find self\n");
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+struct find_race {
+	struct fence_chains fc;
+	atomic_t children;
+};
+
+static int __find_race(void *arg)
+{
+	struct find_race *data = arg;
+	int err = 0;
+
+	while (!kthread_should_stop()) {
+		struct dma_fence *fence = dma_fence_get(data->fc.tail);
+		int seqno;
+
+		seqno = prandom_u32_max(data->fc.chain_length) + 1;
+
+		err = dma_fence_chain_find_seqno(&fence, seqno);
+		if (err) {
+			pr_err("Failed to find fence seqno:%d\n",
+			       seqno);
+			dma_fence_put(fence);
+			break;
+		}
+		if (!fence)
+			goto signal;
+
+		err = dma_fence_chain_find_seqno(&fence, seqno);
+		if (err) {
+			pr_err("Reported an invalid fence for find-self:%d\n",
+			       seqno);
+			dma_fence_put(fence);
+			break;
+		}
+
+		if (fence->seqno < seqno) {
+			pr_err("Reported an earlier fence.seqno:%lld for seqno:%d\n",
+			       fence->seqno, seqno);
+			err = -EINVAL;
+			dma_fence_put(fence);
+			break;
+		}
+
+		dma_fence_put(fence);
+
+signal:
+		seqno = prandom_u32_max(data->fc.chain_length - 1);
+		dma_fence_signal(data->fc.fences[seqno]);
+		cond_resched();
+	}
+
+	if (atomic_dec_and_test(&data->children))
+		wake_up_var(&data->children);
+	return err;
+}
+
+static int find_race(void *arg)
+{
+	struct find_race data;
+	int ncpus = num_online_cpus();
+	struct task_struct **threads;
+	unsigned long count;
+	int err;
+	int i;
+
+	err = fence_chains_init(&data.fc, 64 << 10, seqno_inc);
+	if (err)
+		return err;
+
+	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
+	if (!threads) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	atomic_set(&data.children, 0);
+	for (i = 0; i < ncpus; i++) {
+		threads[i] = kthread_run(__find_race, &data, "dmabuf/%d", i);
+		if (IS_ERR(threads[i])) {
+			ncpus = i;
+			break;
+		}
+		atomic_inc(&data.children);
+		get_task_struct(threads[i]);
+	}
+
+	wait_var_event_timeout(&data.children,
+			       !atomic_read(&data.children),
+			       5 * HZ);
+
+	for (i = 0; i < ncpus; i++) {
+		int ret;
+
+		ret = kthread_stop(threads[i]);
+		if (ret && !err)
+			err = ret;
+		put_task_struct(threads[i]);
+	}
+	kfree(threads);
+
+	count = 0;
+	for (i = 0; i < data.fc.chain_length; i++)
+		if (dma_fence_is_signaled(data.fc.fences[i]))
+			count++;
+	pr_info("Completed %lu cycles\n", count);
+
+err:
+	fence_chains_fini(&data.fc);
+	return err;
+}
+
+static int signal_forward(void *arg)
+{
+	struct fence_chains fc;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc);
+	if (err)
+		return err;
+
+	for (i = 0; i < fc.chain_length; i++) {
+		dma_fence_signal(fc.fences[i]);
+
+		if (!dma_fence_is_signaled(fc.chains[i])) {
+			pr_err("chain[%d] not signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (i + 1 < fc.chain_length &&
+		    dma_fence_is_signaled(fc.chains[i + 1])) {
+			pr_err("chain[%d] is signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int signal_backward(void *arg)
+{
+	struct fence_chains fc;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc);
+	if (err)
+		return err;
+
+	for (i = fc.chain_length; i--; ) {
+		dma_fence_signal(fc.fences[i]);
+
+		if (i > 0 && dma_fence_is_signaled(fc.chains[i])) {
+			pr_err("chain[%d] is signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+	for (i = 0; i < fc.chain_length; i++) {
+		if (!dma_fence_is_signaled(fc.chains[i])) {
+			pr_err("chain[%d] was not signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int __wait_fence_chains(void *arg)
+{
+	struct fence_chains *fc = arg;
+
+	if (dma_fence_wait(fc->tail, false))
+		return -EIO;
+
+	return 0;
+}
+
+static int wait_forward(void *arg)
+{
+	struct fence_chains fc;
+	struct task_struct *tsk;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
+	if (err)
+		return err;
+
+	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
+	if (IS_ERR(tsk)) {
+		err = PTR_ERR(tsk);
+		goto err;
+	}
+	get_task_struct(tsk);
+	yield_to(tsk, true);
+
+	for (i = 0; i < fc.chain_length; i++)
+		dma_fence_signal(fc.fences[i]);
+
+	err = kthread_stop(tsk);
+	put_task_struct(tsk);
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int wait_backward(void *arg)
+{
+	struct fence_chains fc;
+	struct task_struct *tsk;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
+	if (err)
+		return err;
+
+	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
+	if (IS_ERR(tsk)) {
+		err = PTR_ERR(tsk);
+		goto err;
+	}
+	get_task_struct(tsk);
+	yield_to(tsk, true);
+
+	for (i = fc.chain_length; i--; )
+		dma_fence_signal(fc.fences[i]);
+
+	err = kthread_stop(tsk);
+	put_task_struct(tsk);
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static void randomise_fences(struct fence_chains *fc)
+{
+	unsigned int count = fc->chain_length;
+
+	/* Fisher-Yates shuffle courtesy of Knuth */
+	while (--count) {
+		unsigned int swp;
+
+		swp = prandom_u32_max(count + 1);
+		if (swp == count)
+			continue;
+
+		swap(fc->fences[count], fc->fences[swp]);
+	}
+}
+
+static int wait_random(void *arg)
+{
+	struct fence_chains fc;
+	struct task_struct *tsk;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
+	if (err)
+		return err;
+
+	randomise_fences(&fc);
+
+	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
+	if (IS_ERR(tsk)) {
+		err = PTR_ERR(tsk);
+		goto err;
+	}
+	get_task_struct(tsk);
+	yield_to(tsk, true);
+
+	for (i = 0; i < fc.chain_length; i++)
+		dma_fence_signal(fc.fences[i]);
+
+	err = kthread_stop(tsk);
+	put_task_struct(tsk);
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+int dma_fence_chain(void)
+{
+	static const struct subtest tests[] = {
+		SUBTEST(sanitycheck),
+		SUBTEST(find_seqno),
+		SUBTEST(find_signaled),
+		SUBTEST(find_out_of_order),
+		SUBTEST(find_gap),
+		SUBTEST(find_race),
+		SUBTEST(signal_forward),
+		SUBTEST(signal_backward),
+		SUBTEST(wait_forward),
+		SUBTEST(wait_backward),
+		SUBTEST(wait_random),
+	};
+	int ret;
+
+	pr_info("sizeof(dma_fence_chain)=%zu\n",
+		sizeof(struct dma_fence_chain));
+
+	slab_fences = KMEM_CACHE(mock_fence,
+				 SLAB_TYPESAFE_BY_RCU |
+				 SLAB_HWCACHE_ALIGN);
+	if (!slab_fences)
+		return -ENOMEM;
+
+	ret = subtests(tests, NULL);
+
+	kmem_cache_destroy(slab_fences);
+	return ret;
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [Intel-gfx] [PATCH 06/10] dma-buf: Proxy fence, an unsignaled fence placeholder
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
                   ` (3 preceding siblings ...)
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 05/10] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
@ 2020-04-03  9:12 ` Chris Wilson
  2020-04-05 22:14     ` kbuild test robot
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 07/10] drm/syncobj: Allow use of dma-fence-proxy Chris Wilson
                   ` (6 subsequent siblings)
  11 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2020-04-03  9:12 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Often we need to create a fence for a future event that has not yet been
associated with a fence. We can store a proxy fence, a placeholder, in
the timeline and replace it later when the real fence is known. Any
listeners that attach to the proxy fence will automatically be signaled
when the real fence completes, and any future listeners will instead be
attach directly to the real fence avoiding any indirection overhead.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/dma-buf/Makefile             |  13 +-
 drivers/dma-buf/dma-fence-private.h  |  20 +
 drivers/dma-buf/dma-fence-proxy.c    | 189 +++++++++
 drivers/dma-buf/dma-fence.c          |   4 +-
 drivers/dma-buf/selftests.h          |   1 +
 drivers/dma-buf/st-dma-fence-proxy.c | 581 +++++++++++++++++++++++++++
 include/linux/dma-fence-proxy.h      |  20 +
 7 files changed, 824 insertions(+), 4 deletions(-)
 create mode 100644 drivers/dma-buf/dma-fence-private.h
 create mode 100644 drivers/dma-buf/dma-fence-proxy.c
 create mode 100644 drivers/dma-buf/st-dma-fence-proxy.c
 create mode 100644 include/linux/dma-fence-proxy.h

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 995e05f609ff..afaf6dadd9a3 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,6 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
-	 dma-resv.o seqno-fence.o
+obj-y := \
+	dma-buf.o \
+	dma-fence.o \
+	dma-fence-array.o \
+	dma-fence-chain.o \
+	dma-fence-proxy.o \
+	dma-resv.o \
+	seqno-fence.o
 obj-$(CONFIG_DMABUF_HEAPS)	+= dma-heap.o
 obj-$(CONFIG_DMABUF_HEAPS)	+= heaps/
 obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
@@ -10,6 +16,7 @@ obj-$(CONFIG_UDMABUF)		+= udmabuf.o
 dmabuf_selftests-y := \
 	selftest.o \
 	st-dma-fence.o \
-	st-dma-fence-chain.o
+	st-dma-fence-chain.o \
+	st-dma-fence-proxy.o
 
 obj-$(CONFIG_DMABUF_SELFTESTS)	+= dmabuf_selftests.o
diff --git a/drivers/dma-buf/dma-fence-private.h b/drivers/dma-buf/dma-fence-private.h
new file mode 100644
index 000000000000..6924d28af0fa
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-private.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Fence mechanism for dma-buf and to allow for asynchronous dma access
+ *
+ * Copyright (C) 2012 Canonical Ltd
+ * Copyright (C) 2012 Texas Instruments
+ *
+ * Authors:
+ * Rob Clark <robdclark@gmail.com>
+ * Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ */
+
+#ifndef DMA_FENCE_PRIVATE_H
+#define DMA_FENCE_PRIAVTE_H
+
+struct dma_fence;
+
+bool __dma_fence_enable_signaling(struct dma_fence *fence);
+
+#endif /* DMA_FENCE_PRIAVTE_H */
diff --git a/drivers/dma-buf/dma-fence-proxy.c b/drivers/dma-buf/dma-fence-proxy.c
new file mode 100644
index 000000000000..6dce543d0757
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-proxy.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * dma-fence-proxy: placeholder unsignaled fence
+ *
+ * Copyright (C) 2017-2019 Intel Corporation
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/dma-fence-proxy.h>
+#include <linux/export.h>
+#include <linux/irq_work.h>
+#include <linux/slab.h>
+
+#include "dma-fence-private.h"
+
+struct dma_fence_proxy {
+	struct dma_fence base;
+	spinlock_t lock;
+
+	struct dma_fence *real;
+	struct dma_fence_cb cb;
+	struct irq_work work;
+};
+
+static const char *proxy_get_driver_name(struct dma_fence *fence)
+{
+	struct dma_fence_proxy *p = container_of(fence, typeof(*p), base);
+	struct dma_fence *real = READ_ONCE(p->real);
+
+	return real ? real->ops->get_driver_name(real) : "proxy";
+}
+
+static const char *proxy_get_timeline_name(struct dma_fence *fence)
+{
+	struct dma_fence_proxy *p = container_of(fence, typeof(*p), base);
+	struct dma_fence *real = READ_ONCE(p->real);
+
+	return real ? real->ops->get_timeline_name(real) : "unset";
+}
+
+static void proxy_irq_work(struct irq_work *work)
+{
+	struct dma_fence_proxy *p = container_of(work, typeof(*p), work);
+
+	dma_fence_signal(&p->base);
+	dma_fence_put(&p->base);
+}
+
+static void proxy_callback(struct dma_fence *real, struct dma_fence_cb *cb)
+{
+	struct dma_fence_proxy *p = container_of(cb, typeof(*p), cb);
+
+	if (real->error)
+		dma_fence_set_error(&p->base, real->error);
+
+	/* Lower the height of the proxy chain -> single stack frame */
+	irq_work_queue(&p->work);
+}
+
+static bool proxy_enable_signaling(struct dma_fence *fence)
+{
+	struct dma_fence_proxy *p = container_of(fence, typeof(*p), base);
+	struct dma_fence *real = READ_ONCE(p->real);
+	bool ret = true;
+
+	if (real) {
+		spin_lock_nested(real->lock, SINGLE_DEPTH_NESTING);
+		ret = __dma_fence_enable_signaling(real);
+		spin_unlock(real->lock);
+	}
+
+	return ret;
+}
+
+static void proxy_release(struct dma_fence *fence)
+{
+	struct dma_fence_proxy *p = container_of(fence, typeof(*p), base);
+
+	dma_fence_put(p->real);
+	dma_fence_free(&p->base);
+}
+
+static const struct dma_fence_ops dma_fence_proxy_ops = {
+	.get_driver_name = proxy_get_driver_name,
+	.get_timeline_name = proxy_get_timeline_name,
+	.enable_signaling = proxy_enable_signaling,
+	.wait = dma_fence_default_wait,
+	.release = proxy_release,
+};
+
+/**
+ * dma_fence_create_proxy - Create an unset dma-fence
+ *
+ * dma_fence_create_proxy() creates a new dma_fence stub that is initially
+ * unsignaled and may later be replaced with a real fence. Any listeners
+ * to the proxy fence will be signaled when the target fence signals its
+ * completion.
+ */
+struct dma_fence *dma_fence_create_proxy(void)
+{
+	struct dma_fence_proxy *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	spin_lock_init(&p->lock);
+	dma_fence_init(&p->base, &dma_fence_proxy_ops, &p->lock,
+		       dma_fence_context_alloc(1), 0);
+	init_irq_work(&p->work, proxy_irq_work);
+
+	return &p->base;
+}
+EXPORT_SYMBOL(dma_fence_create_proxy);
+
+static void wrap_signal_locked(struct dma_fence *fence, struct dma_fence *real)
+{
+	if (real->error)
+		dma_fence_set_error(fence, real->error);
+	dma_fence_signal_locked(fence);
+}
+
+static void proxy_assign(struct dma_fence *fence, struct dma_fence *real)
+{
+	struct dma_fence_proxy *p = container_of(fence, typeof(*p), base);
+	unsigned long flags;
+
+	if (WARN_ON(fence == real))
+		return;
+
+	if (WARN_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)))
+		return;
+
+	if (WARN_ON(p->real))
+		return;
+
+	spin_lock_irqsave(p->base.lock, flags);
+
+	if (unlikely(!real)) {
+		dma_fence_signal_locked(&p->base);
+		goto unlock;
+	}
+
+	p->real = dma_fence_get(real);
+
+	spin_lock_nested(real->lock, SINGLE_DEPTH_NESTING);
+	if (dma_fence_is_signaled(real)) {
+		wrap_signal_locked(&p->base, real);
+	} else if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			    &p->base.flags) &&
+		   !__dma_fence_enable_signaling(real)) {
+		wrap_signal_locked(&p->base, real);
+	} else {
+		dma_fence_get(&p->base);
+		p->cb.func = proxy_callback;
+		list_add_tail(&p->cb.node, &real->cb_list);
+	}
+	spin_unlock(real->lock);
+
+unlock:
+	spin_unlock_irqrestore(p->base.lock, flags);
+}
+
+/**
+ * dma_fence_replace_proxy - Replace the proxy fence with the real target
+ * @slot: pointer to location of fence to update
+ * @fence: the new fence to store in @slot
+ *
+ * Once the real dma_fence is known, we can replace the proxy fence holder
+ * with a pointer to the real dma fence. Future listeners will attach to
+ * the real fence, avoiding any indirection overhead. Previous listeners
+ * will remain attached to the proxy fence, and be signaled in turn when
+ * the target fence completes.
+ */
+struct dma_fence *
+dma_fence_replace_proxy(struct dma_fence __rcu **slot, struct dma_fence *fence)
+{
+	struct dma_fence *old;
+
+	if (fence)
+		dma_fence_get(fence);
+
+	old = rcu_replace_pointer(*slot, fence, true);
+	if (old && old->ops == &dma_fence_proxy_ops)
+		proxy_assign(old, fence);
+
+	return old;
+}
+EXPORT_SYMBOL(dma_fence_replace_proxy);
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 052a41e2451c..fa7bedc6703d 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -19,6 +19,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/dma_fence.h>
 
+#include "dma-fence-private.h"
+
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
@@ -273,7 +275,7 @@ void dma_fence_free(struct dma_fence *fence)
 }
 EXPORT_SYMBOL(dma_fence_free);
 
-static bool __dma_fence_enable_signaling(struct dma_fence *fence)
+bool __dma_fence_enable_signaling(struct dma_fence *fence)
 {
 	bool was_set;
 
diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h
index 55918ef9adab..616eca70e2d8 100644
--- a/drivers/dma-buf/selftests.h
+++ b/drivers/dma-buf/selftests.h
@@ -12,3 +12,4 @@
 selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */
 selftest(dma_fence, dma_fence)
 selftest(dma_fence_chain, dma_fence_chain)
+selftest(dma_fence_proxy, dma_fence_proxy)
diff --git a/drivers/dma-buf/st-dma-fence-proxy.c b/drivers/dma-buf/st-dma-fence-proxy.c
new file mode 100644
index 000000000000..658f6b90abc4
--- /dev/null
+++ b/drivers/dma-buf/st-dma-fence-proxy.c
@@ -0,0 +1,581 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-fence-proxy.h>
+#include <linux/kernel.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "selftest.h"
+
+static struct kmem_cache *slab_fences;
+
+static struct mock_fence {
+	struct dma_fence base;
+	spinlock_t lock;
+} *to_mock_fence(struct dma_fence *f) {
+	return container_of(f, struct mock_fence, base);
+}
+
+static const char *mock_name(struct dma_fence *f)
+{
+	return "mock";
+}
+
+static void mock_fence_release(struct dma_fence *f)
+{
+	kmem_cache_free(slab_fences, to_mock_fence(f));
+}
+
+static const struct dma_fence_ops mock_ops = {
+	.get_driver_name = mock_name,
+	.get_timeline_name = mock_name,
+	.release = mock_fence_release,
+};
+
+static struct dma_fence *mock_fence(void)
+{
+	struct mock_fence *f;
+
+	f = kmem_cache_alloc(slab_fences, GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	spin_lock_init(&f->lock);
+	dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0);
+
+	return &f->base;
+}
+
+static int sanitycheck(void *arg)
+{
+	struct dma_fence *f;
+
+	f = dma_fence_create_proxy();
+	if (!f)
+		return -ENOMEM;
+
+	dma_fence_signal(f);
+	dma_fence_put(f);
+
+	return 0;
+}
+
+struct fences {
+	struct dma_fence *real;
+	struct dma_fence *proxy;
+	struct dma_fence __rcu *slot;
+};
+
+static int create_fences(struct fences *f, bool attach)
+{
+	f->proxy = dma_fence_create_proxy();
+	if (!f->proxy)
+		return -ENOMEM;
+
+	RCU_INIT_POINTER(f->slot, f->proxy);
+
+	f->real = mock_fence();
+	if (!f->real) {
+		dma_fence_put(f->proxy);
+		return -ENOMEM;
+	}
+
+	if (attach)
+		dma_fence_replace_proxy(&f->slot, f->real);
+
+	return 0;
+}
+
+static void free_fences(struct fences *f)
+{
+	dma_fence_put(dma_fence_replace_proxy(&f->slot, NULL));
+	dma_fence_put(f->real);
+	dma_fence_put(f->proxy);
+}
+
+static int wrap_signaling(void *arg)
+{
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_is_signaled(f.proxy)) {
+		pr_err("Fence unexpectedly signaled on creation\n");
+		goto err_free;
+	}
+
+	if (dma_fence_signal(f.real)) {
+		pr_err("Fence reported being already signaled\n");
+		goto err_free;
+	}
+
+	if (!dma_fence_is_signaled(f.proxy)) {
+		pr_err("Fence not reporting signaled\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_signaling_recurse(void *arg)
+{
+	struct fences f;
+	struct dma_fence *chain;
+	int err = -EINVAL;
+
+	if (create_fences(&f, false))
+		return -ENOMEM;
+
+	chain = dma_fence_create_proxy();
+	if (!chain) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	dma_fence_replace_proxy(&f.slot, chain);
+	dma_fence_put(dma_fence_replace_proxy(&f.slot, f.real));
+	dma_fence_put(chain);
+
+	/* f.real <- chain <- f.proxy */
+
+	if (dma_fence_is_signaled(f.proxy)) {
+		pr_err("Fence unexpectedly signaled on creation\n");
+		goto err_free;
+	}
+
+	if (dma_fence_signal(f.real)) {
+		pr_err("Fence reported being already signaled\n");
+		goto err_free;
+	}
+
+	if (!dma_fence_is_signaled(f.proxy)) {
+		pr_err("Fence not reporting signaled\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+struct simple_cb {
+	struct dma_fence_cb cb;
+	bool seen;
+};
+
+static void simple_callback(struct dma_fence *f, struct dma_fence_cb *cb)
+{
+	smp_store_mb(container_of(cb, struct simple_cb, cb)->seen, true);
+}
+
+static int wrap_add_callback(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_add_callback_recurse(void *arg)
+{
+	struct simple_cb cb = {};
+	struct dma_fence *chain;
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, false))
+		return -ENOMEM;
+
+	chain = dma_fence_create_proxy();
+	if (!chain) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	dma_fence_replace_proxy(&f.slot, chain);
+	dma_fence_put(dma_fence_replace_proxy(&f.slot, f.real));
+	dma_fence_put(chain);
+
+	/* f.real <- chain <- f.proxy */
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_late_add_callback(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	dma_fence_signal(f.real);
+
+	if (!dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Added callback, but fence was already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (cb.seen) {
+		pr_err("Callback called after failed attachment!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_early_add_callback(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, false))
+		return -ENOMEM;
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_replace_proxy(&f.slot, f.real);
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_early_add_callback_late(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, false))
+		return -ENOMEM;
+
+	dma_fence_signal(f.real);
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_replace_proxy(&f.slot, f.real);
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_early_add_callback_early(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, false))
+		return -ENOMEM;
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_replace_proxy(&f.slot, f.real);
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_rm_callback(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	if (!dma_fence_remove_callback(f.proxy, &cb.cb)) {
+		pr_err("Failed to remove callback!\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (cb.seen) {
+		pr_err("Callback still signaled after removal!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_late_rm_callback(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	if (dma_fence_remove_callback(f.proxy, &cb.cb)) {
+		pr_err("Callback removal succeed after being executed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_status(void *arg)
+{
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_get_status(f.proxy)) {
+		pr_err("Fence unexpectedly has signaled status on creation\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (!dma_fence_get_status(f.proxy)) {
+		pr_err("Fence not reporting signaled status\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_error(void *arg)
+{
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	dma_fence_set_error(f.real, -EIO);
+
+	if (dma_fence_get_status(f.proxy)) {
+		pr_err("Fence unexpectedly has error status before signal\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (dma_fence_get_status(f.proxy) != -EIO) {
+		pr_err("Fence not reporting error status, got %d\n",
+		       dma_fence_get_status(f.proxy));
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_wait(void *arg)
+{
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_wait_timeout(f.proxy, false, 0) != 0) {
+		pr_err("Wait reported complete before being signaled\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+
+	if (dma_fence_wait_timeout(f.proxy, false, 0) == 0) {
+		pr_err("Wait reported incomplete after being signaled\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	dma_fence_signal(f.real);
+	free_fences(&f);
+	return err;
+}
+
+struct wait_timer {
+	struct timer_list timer;
+	struct fences f;
+};
+
+static void wait_timer(struct timer_list *timer)
+{
+	struct wait_timer *wt = from_timer(wt, timer, timer);
+
+	dma_fence_signal(wt->f.real);
+}
+
+static int wrap_wait_timeout(void *arg)
+{
+	struct wait_timer wt;
+	int err = -EINVAL;
+
+	if (create_fences(&wt.f, true))
+		return -ENOMEM;
+
+	timer_setup_on_stack(&wt.timer, wait_timer, 0);
+
+	if (dma_fence_wait_timeout(wt.f.proxy, false, 1) != 0) {
+		pr_err("Wait reported complete before being signaled\n");
+		goto err_free;
+	}
+
+	mod_timer(&wt.timer, jiffies + 1);
+
+	if (dma_fence_wait_timeout(wt.f.proxy, false, 2) != 0) {
+		if (timer_pending(&wt.timer)) {
+			pr_notice("Timer did not fire within the jiffie!\n");
+			err = 0; /* not our fault! */
+		} else {
+			pr_err("Wait reported incomplete after timeout\n");
+		}
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	del_timer_sync(&wt.timer);
+	destroy_timer_on_stack(&wt.timer);
+	dma_fence_signal(wt.f.real);
+	free_fences(&wt.f);
+	return err;
+}
+
+int dma_fence_proxy(void)
+{
+	static const struct subtest tests[] = {
+		SUBTEST(sanitycheck),
+		SUBTEST(wrap_signaling),
+		SUBTEST(wrap_signaling_recurse),
+		SUBTEST(wrap_add_callback),
+		SUBTEST(wrap_add_callback_recurse),
+		SUBTEST(wrap_late_add_callback),
+		SUBTEST(wrap_early_add_callback),
+		SUBTEST(wrap_early_add_callback_late),
+		SUBTEST(wrap_early_add_callback_early),
+		SUBTEST(wrap_rm_callback),
+		SUBTEST(wrap_late_rm_callback),
+		SUBTEST(wrap_status),
+		SUBTEST(wrap_error),
+		SUBTEST(wrap_wait),
+		SUBTEST(wrap_wait_timeout),
+	};
+	int ret;
+
+	slab_fences = KMEM_CACHE(mock_fence,
+				 SLAB_TYPESAFE_BY_RCU |
+				 SLAB_HWCACHE_ALIGN);
+	if (!slab_fences)
+		return -ENOMEM;
+
+	ret = subtests(tests, NULL);
+
+	kmem_cache_destroy(slab_fences);
+
+	return ret;
+}
diff --git a/include/linux/dma-fence-proxy.h b/include/linux/dma-fence-proxy.h
new file mode 100644
index 000000000000..587d5044f0bf
--- /dev/null
+++ b/include/linux/dma-fence-proxy.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * dma-fence-proxy: allows waiting upon unset and future fences
+ *
+ * Copyright (C) 2017 Intel Corporation
+ */
+
+#ifndef __LINUX_DMA_FENCE_PROXY_H
+#define __LINUX_DMA_FENCE_PROXY_H
+
+#include <linux/kernel.h>
+
+struct dma_fence;
+
+struct dma_fence *dma_fence_create_proxy(void);
+
+struct dma_fence *
+dma_fence_replace_proxy(struct dma_fence __rcu **slot, struct dma_fence *fence);
+
+#endif /* __LINUX_DMA_FENCE_PROXY_H */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [Intel-gfx] [PATCH 07/10] drm/syncobj: Allow use of dma-fence-proxy
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
                   ` (4 preceding siblings ...)
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 06/10] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
@ 2020-04-03  9:12 ` Chris Wilson
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 08/10] drm/i915/gem: Teach execbuf how to wait on future syncobj Chris Wilson
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2020-04-03  9:12 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Allow the callers to supply a dma-fence-proxy for asynchronous waiting on
future fences.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_syncobj.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 42d46414f767..e141db0e1eb6 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -184,6 +184,7 @@
  */
 
 #include <linux/anon_inodes.h>
+#include <linux/dma-fence-proxy.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/sched/signal.h>
@@ -324,14 +325,9 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 	struct dma_fence *old_fence;
 	struct syncobj_wait_entry *cur, *tmp;
 
-	if (fence)
-		dma_fence_get(fence);
-
 	spin_lock(&syncobj->lock);
 
-	old_fence = rcu_dereference_protected(syncobj->fence,
-					      lockdep_is_held(&syncobj->lock));
-	rcu_assign_pointer(syncobj->fence, fence);
+	old_fence = dma_fence_replace_proxy(&syncobj->fence, fence);
 
 	if (fence != old_fence) {
 		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [Intel-gfx] [PATCH 08/10] drm/i915/gem: Teach execbuf how to wait on future syncobj
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
                   ` (5 preceding siblings ...)
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 07/10] drm/syncobj: Allow use of dma-fence-proxy Chris Wilson
@ 2020-04-03  9:12 ` Chris Wilson
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 09/10] drm/i915/gem: Allow combining submit-fences with syncobj Chris Wilson
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2020-04-03  9:12 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

If a syncobj has not yet been assigned, treat it as a future fence and
install and wait upon a dma-fence-proxy. The proxy will be replace by
the real fence later, and that fence will be responsible for signaling
our waiter.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 9d11bad74e9a..bf1b5399ffa3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/intel-iommu.h>
+#include <linux/dma-fence-proxy.h>
 #include <linux/dma-resv.h>
 #include <linux/sync_file.h>
 #include <linux/uaccess.h>
@@ -2334,8 +2335,24 @@ await_fence_array(struct i915_execbuffer *eb,
 			continue;
 
 		fence = drm_syncobj_fence_get(syncobj);
-		if (!fence)
-			return -EINVAL;
+		if (!fence) {
+			struct dma_fence *old;
+
+			fence = dma_fence_create_proxy();
+			if (!fence)
+				return -ENOMEM;
+
+			spin_lock(&syncobj->lock);
+			old = rcu_dereference_protected(syncobj->fence, true);
+			if (unlikely(old)) {
+				dma_fence_put(fence);
+				fence = dma_fence_get(old);
+			} else {
+				rcu_assign_pointer(syncobj->fence,
+						   dma_fence_get(fence));
+			}
+			spin_unlock(&syncobj->lock);
+		}
 
 		err = i915_request_await_dma_fence(eb->request, fence);
 		dma_fence_put(fence);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [Intel-gfx] [PATCH 09/10] drm/i915/gem: Allow combining submit-fences with syncobj
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
                   ` (6 preceding siblings ...)
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 08/10] drm/i915/gem: Teach execbuf how to wait on future syncobj Chris Wilson
@ 2020-04-03  9:12 ` Chris Wilson
  2020-04-07 10:44   ` Tvrtko Ursulin
  2020-04-03  9:13 ` [Intel-gfx] [PATCH 10/10] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
                   ` (3 subsequent siblings)
  11 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2020-04-03  9:12 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Fixes: a88b6e4cbafd ("drm/i915: Allow specification of parallel execbuf")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 +++++++---
 include/uapi/drm/i915_drm.h                    |  7 ++++---
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index bf1b5399ffa3..5c1c5a9eced4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2299,7 +2299,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
 
-		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+		fences[n] = ptr_pack_bits(syncobj, fence.flags, 3);
 	}
 
 	return fences;
@@ -2330,7 +2330,7 @@ await_fence_array(struct i915_execbuffer *eb,
 		struct dma_fence *fence;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(fences[n], &flags, 3);
 		if (!(flags & I915_EXEC_FENCE_WAIT))
 			continue;
 
@@ -2354,7 +2354,11 @@ await_fence_array(struct i915_execbuffer *eb,
 			spin_unlock(&syncobj->lock);
 		}
 
-		err = i915_request_await_dma_fence(eb->request, fence);
+		if (flags & I915_EXEC_FENCE_WAIT_SUBMIT)
+			err = i915_request_await_execution(eb->request, fence,
+							   eb->engine->bond_execute);
+		else
+			err = i915_request_await_dma_fence(eb->request, fence);
 		dma_fence_put(fence);
 		if (err < 0)
 			return err;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 14b67cd6b54b..704dd0e3bc1d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1040,9 +1040,10 @@ struct drm_i915_gem_exec_fence {
 	 */
 	__u32 handle;
 
-#define I915_EXEC_FENCE_WAIT            (1<<0)
-#define I915_EXEC_FENCE_SIGNAL          (1<<1)
-#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
+#define I915_EXEC_FENCE_WAIT            (1u << 0)
+#define I915_EXEC_FENCE_SIGNAL          (1u << 1)
+#define I915_EXEC_FENCE_WAIT_SUBMIT     (1u << 2)
+#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_WAIT_SUBMIT << 1))
 	__u32 flags;
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [Intel-gfx] [PATCH 10/10] drm/i915/gt: Declare when we enabled timeslicing
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
                   ` (7 preceding siblings ...)
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 09/10] drm/i915/gem: Allow combining submit-fences with syncobj Chris Wilson
@ 2020-04-03  9:13 ` Chris Wilson
  2020-04-07 10:50   ` Tvrtko Ursulin
  2020-04-03  9:32 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/10] drm/i915/selftests: Add request throughput measurement to perf Patchwork
                   ` (2 subsequent siblings)
  11 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2020-04-03  9:13 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kenneth Graunke, Chris Wilson

Let userspace know if they can trust timeslicing by including it as part
of the I915_PARAM_HAS_SCHEDULER::I915_SCHEDULER_CAP_TIMESLICING

v2: Only declare timeslicing if we can safely preempt userspace.

Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
---
 drivers/gpu/drm/i915/gt/intel_engine.h      | 3 ++-
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 5 +++++
 include/uapi/drm/i915_drm.h                 | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index b469de0dd9b6..424672ee7874 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -339,7 +339,8 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
 		return false;
 
-	return intel_engine_has_semaphores(engine);
+	return (intel_engine_has_semaphores(engine) &&
+		intel_engine_has_preemption(engine));
 }
 
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 848decee9066..b84fdd722781 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -121,6 +121,11 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
 			else
 				disabled |= BIT(map[i].sched);
 		}
+
+		if (intel_engine_has_timeslices(engine))
+			enabled |= I915_SCHEDULER_CAP_TIMESLICING;
+		else
+			disabled |= I915_SCHEDULER_CAP_TIMESLICING;
 	}
 
 	i915->caps.scheduler = enabled & ~disabled;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 704dd0e3bc1d..1ee227b5131a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -523,6 +523,7 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
 #define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
 #define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS	(1ul << 4)
+#define   I915_SCHEDULER_CAP_TIMESLICING	(1ul << 5)
 
 #define I915_PARAM_HUC_STATUS		 42
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/10] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
                   ` (8 preceding siblings ...)
  2020-04-03  9:13 ` [Intel-gfx] [PATCH 10/10] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
@ 2020-04-03  9:32 ` Patchwork
  2020-04-03  9:58 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  2020-04-03 17:23 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  11 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2020-04-03  9:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/10] drm/i915/selftests: Add request throughput measurement to perf
URL   : https://patchwork.freedesktop.org/series/75452/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
9b51ead71e1a drm/i915/selftests: Add request throughput measurement to perf
-:96: WARNING:LINE_SPACING: Missing a blank line after declarations
#96: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1525:
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);

-:130: WARNING:LINE_SPACING: Missing a blank line after declarations
#130: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1559:
+	struct i915_request *prev = NULL;
+	IGT_TIMEOUT(end_time);

-:165: WARNING:LINE_SPACING: Missing a blank line after declarations
#165: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1594:
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);

-:188: WARNING:LINE_SPACING: Missing a blank line after declarations
#188: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1617:
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {

-:196: WARNING:LINE_SPACING: Missing a blank line after declarations
#196: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1625:
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);

-:325: WARNING:LINE_SPACING: Missing a blank line after declarations
#325: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1754:
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);

-:393: WARNING:LINE_SPACING: Missing a blank line after declarations
#393: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1822:
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);

-:462: WARNING:LINE_SPACING: Missing a blank line after declarations
#462: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1891:
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);

-:518: WARNING:LINE_SPACING: Missing a blank line after declarations
#518: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1947:
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {

-:526: WARNING:LINE_SPACING: Missing a blank line after declarations
#526: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1955:
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);

-:571: WARNING:YIELD: Using yield() is generally wrong. See yield() kernel-doc (sched/core.c)
#571: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:2000:
+		yield(); /* start all threads before we kthread_stop() */

total: 0 errors, 11 warnings, 0 checks, 611 lines checked
1401910d33b8 drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore
daaaab3b61d6 dma-buf: Prettify typecasts for dma-fence-chain
42c9e1350031 dma-buf: Report signaled links inside dma-fence-chain
ae8934bfc404 dma-buf: Exercise dma-fence-chain under selftests
-:33: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#33: 
new file mode 100644

-:61: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#61: FILE: drivers/dma-buf/st-dma-fence-chain.c:24:
+	spinlock_t lock;

-:235: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'find_seqno', this function's name, in a string
#235: FILE: drivers/dma-buf/st-dma-fence-chain.c:198:
+		pr_err("Reported %d for find_seqno(0)!\n", err);

-:244: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'find_seqno', this function's name, in a string
#244: FILE: drivers/dma-buf/st-dma-fence-chain.c:207:
+			pr_err("Reported %d for find_seqno(%d:%d)!\n",

-:249: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'find_seqno', this function's name, in a string
#249: FILE: drivers/dma-buf/st-dma-fence-chain.c:212:
+			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",

-:272: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'find_seqno', this function's name, in a string
#272: FILE: drivers/dma-buf/st-dma-fence-chain.c:235:
+			pr_err("Error not reported for future fence: find_seqno(%d:%d)!\n",

-:286: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'find_seqno', this function's name, in a string
#286: FILE: drivers/dma-buf/st-dma-fence-chain.c:249:
+			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",

-:737: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'dma_fence_chain', this function's name, in a string
#737: FILE: drivers/dma-buf/st-dma-fence-chain.c:700:
+	pr_info("sizeof(dma_fence_chain)=%zu\n",

total: 0 errors, 7 warnings, 1 checks, 725 lines checked
ecffda61d51b dma-buf: Proxy fence, an unsignaled fence placeholder
-:45: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#45: 
new file mode 100644

-:93: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#93: FILE: drivers/dma-buf/dma-fence-proxy.c:18:
+	spinlock_t lock;

-:321: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#321: FILE: drivers/dma-buf/st-dma-fence-proxy.c:20:
+	spinlock_t lock;

-:481: WARNING:MEMORY_BARRIER: memory barrier without comment
#481: FILE: drivers/dma-buf/st-dma-fence-proxy.c:180:
+	smp_store_mb(container_of(cb, struct simple_cb, cb)->seen, true);

total: 0 errors, 2 warnings, 2 checks, 852 lines checked
537c5a3fd9bc drm/syncobj: Allow use of dma-fence-proxy
7889ebb21db1 drm/i915/gem: Teach execbuf how to wait on future syncobj
fa98e5774e0e drm/i915/gem: Allow combining submit-fences with syncobj
48a06c080f4b drm/i915/gt: Declare when we enabled timeslicing

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [01/10] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
                   ` (9 preceding siblings ...)
  2020-04-03  9:32 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/10] drm/i915/selftests: Add request throughput measurement to perf Patchwork
@ 2020-04-03  9:58 ` Patchwork
  2020-04-03 17:23 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  11 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2020-04-03  9:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/10] drm/i915/selftests: Add request throughput measurement to perf
URL   : https://patchwork.freedesktop.org/series/75452/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8243 -> Patchwork_17197
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/index.html

New tests
---------

  New tests have been introduced between CI_DRM_8243 and Patchwork_17197:

### New IGT tests (2) ###

  * igt@dmabuf@all@dma_fence_chain:
    - Statuses : 43 pass(s)
    - Exec time: [7.43, 32.02] s

  * igt@dmabuf@all@dma_fence_proxy:
    - Statuses : 43 pass(s)
    - Exec time: [0.03, 0.12] s

  

Known issues
------------

  Here are the changes found in Patchwork_17197 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-cml-u2:          [PASS][1] -> [DMESG-WARN][2] ([IGT#4])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/fi-cml-u2/igt@kms_chamelium@common-hpd-after-suspend.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/fi-cml-u2/igt@kms_chamelium@common-hpd-after-suspend.html

  * igt@kms_chamelium@dp-edid-read:
    - fi-cml-u2:          [PASS][3] -> [FAIL][4] ([i915#976])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/fi-cml-u2/igt@kms_chamelium@dp-edid-read.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/fi-cml-u2/igt@kms_chamelium@dp-edid-read.html

  
#### Possible fixes ####

  * igt@i915_pm_rpm@module-reload:
    - fi-icl-dsi:         [INCOMPLETE][5] ([i915#189]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/fi-icl-dsi/igt@i915_pm_rpm@module-reload.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/fi-icl-dsi/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live@execlists:
    - fi-bxt-dsi:         [INCOMPLETE][7] ([i915#656]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/fi-bxt-dsi/igt@i915_selftest@live@execlists.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/fi-bxt-dsi/igt@i915_selftest@live@execlists.html

  
  [IGT#4]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/4
  [i915#189]: https://gitlab.freedesktop.org/drm/intel/issues/189
  [i915#656]: https://gitlab.freedesktop.org/drm/intel/issues/656
  [i915#976]: https://gitlab.freedesktop.org/drm/intel/issues/976


Participating hosts (41 -> 44)
------------------------------

  Additional (9): fi-skl-6770hq fi-bwr-2160 fi-snb-2520m fi-ivb-3770 fi-cfl-8109u fi-skl-lmem fi-kbl-7560u fi-byt-n2820 fi-skl-6600u 
  Missing    (6): fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-kbl-x1275 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8243 -> Patchwork_17197

  CI-20190529: 20190529
  CI_DRM_8243: 45ccb1b8606b6ba1a5d4f8a8b4dda27bd8dbb04c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5560: 213062c7dcf0cbc8069cbb5f91acbc494def33fd @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17197: 48a06c080f4b9055f83ac771df1ec5506b72bb9c @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

48a06c080f4b drm/i915/gt: Declare when we enabled timeslicing
fa98e5774e0e drm/i915/gem: Allow combining submit-fences with syncobj
7889ebb21db1 drm/i915/gem: Teach execbuf how to wait on future syncobj
537c5a3fd9bc drm/syncobj: Allow use of dma-fence-proxy
ecffda61d51b dma-buf: Proxy fence, an unsignaled fence placeholder
ae8934bfc404 dma-buf: Exercise dma-fence-chain under selftests
42c9e1350031 dma-buf: Report signaled links inside dma-fence-chain
daaaab3b61d6 dma-buf: Prettify typecasts for dma-fence-chain
1401910d33b8 drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore
9b51ead71e1a drm/i915/selftests: Add request throughput measurement to perf

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [01/10] drm/i915/selftests: Add request throughput measurement to perf
  2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
                   ` (10 preceding siblings ...)
  2020-04-03  9:58 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-04-03 17:23 ` Patchwork
  11 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2020-04-03 17:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/10] drm/i915/selftests: Add request throughput measurement to perf
URL   : https://patchwork.freedesktop.org/series/75452/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8243_full -> Patchwork_17197_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_17197_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_17197_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_17197_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_mmap_gtt@cpuset-basic-small-copy-xy:
    - shard-kbl:          [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-kbl4/igt@gem_mmap_gtt@cpuset-basic-small-copy-xy.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-kbl3/igt@gem_mmap_gtt@cpuset-basic-small-copy-xy.html

  * igt@gem_mmap_gtt@hang:
    - shard-iclb:         [PASS][3] -> [FAIL][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-iclb2/igt@gem_mmap_gtt@hang.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-iclb5/igt@gem_mmap_gtt@hang.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@sysfs_heartbeat_interval@mixed@bcs0}:
    - shard-skl:          [PASS][5] -> [INCOMPLETE][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-skl2/igt@sysfs_heartbeat_interval@mixed@bcs0.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-skl2/igt@sysfs_heartbeat_interval@mixed@bcs0.html

  
New tests
---------

  New tests have been introduced between CI_DRM_8243_full and Patchwork_17197_full:

### New IGT tests (4) ###

  * igt@dmabuf@all@dma_fence_chain:
    - Statuses : 7 pass(s)
    - Exec time: [7.44, 36.86] s

  * igt@dmabuf@all@dma_fence_proxy:
    - Statuses : 7 pass(s)
    - Exec time: [0.04, 0.09] s

  * igt@i915_selftest@perf@request:
    - Statuses : 7 pass(s)
    - Exec time: [3.50, 5.68] s

  * igt@perf_pmu@faulting-read:
    - Statuses :
    - Exec time: [None] s

  

Known issues
------------

  Here are the changes found in Patchwork_17197_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_workarounds@suspend-resume-context:
    - shard-kbl:          [PASS][7] -> [DMESG-WARN][8] ([i915#180] / [i915#93] / [i915#95])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-kbl1/igt@gem_workarounds@suspend-resume-context.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-kbl2/igt@gem_workarounds@suspend-resume-context.html

  * igt@kms_cursor_crc@pipe-b-cursor-suspend:
    - shard-apl:          [PASS][9] -> [DMESG-WARN][10] ([i915#180]) +1 similar issue
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-apl4/igt@kms_cursor_crc@pipe-b-cursor-suspend.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-apl6/igt@kms_cursor_crc@pipe-b-cursor-suspend.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-kbl:          [PASS][11] -> [DMESG-WARN][12] ([i915#180]) +4 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-kbl3/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-kbl1/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic:
    - shard-glk:          [PASS][13] -> [FAIL][14] ([i915#72])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-glk6/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-glk3/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html

  * igt@kms_draw_crc@draw-method-xrgb8888-mmap-gtt-untiled:
    - shard-apl:          [PASS][15] -> [FAIL][16] ([i915#52] / [i915#54] / [i915#95])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-apl4/igt@kms_draw_crc@draw-method-xrgb8888-mmap-gtt-untiled.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-apl6/igt@kms_draw_crc@draw-method-xrgb8888-mmap-gtt-untiled.html

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          [PASS][17] -> [FAIL][18] ([fdo#108145] / [i915#265]) +2 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-skl5/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-skl8/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html

  * igt@kms_psr@psr2_sprite_blt:
    - shard-iclb:         [PASS][19] -> [SKIP][20] ([fdo#109441]) +1 similar issue
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-iclb2/igt@kms_psr@psr2_sprite_blt.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-iclb5/igt@kms_psr@psr2_sprite_blt.html

  * igt@kms_setmode@basic:
    - shard-apl:          [PASS][21] -> [FAIL][22] ([i915#31])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-apl8/igt@kms_setmode@basic.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-apl4/igt@kms_setmode@basic.html

  * igt@prime_vgem@wait-bsd2:
    - shard-iclb:         [PASS][23] -> [SKIP][24] ([fdo#109276]) +4 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-iclb2/igt@prime_vgem@wait-bsd2.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-iclb3/igt@prime_vgem@wait-bsd2.html

  
#### Possible fixes ####

  * igt@gem_ctx_persistence@engines-mixed-process@vecs0:
    - shard-skl:          [FAIL][25] ([i915#1528]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-skl5/igt@gem_ctx_persistence@engines-mixed-process@vecs0.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-skl8/igt@gem_ctx_persistence@engines-mixed-process@vecs0.html

  * igt@gem_mmap_gtt@cpuset-basic-small-copy-odd:
    - shard-skl:          [FAIL][27] -> [PASS][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-skl5/igt@gem_mmap_gtt@cpuset-basic-small-copy-odd.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-skl1/igt@gem_mmap_gtt@cpuset-basic-small-copy-odd.html

  * igt@gem_tiled_swapping@non-threaded:
    - shard-kbl:          [FAIL][29] ([i915#93] / [i915#95]) -> [PASS][30]
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-kbl3/igt@gem_tiled_swapping@non-threaded.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-kbl1/igt@gem_tiled_swapping@non-threaded.html
    - shard-tglb:         [FAIL][31] -> [PASS][32]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-tglb2/igt@gem_tiled_swapping@non-threaded.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-tglb6/igt@gem_tiled_swapping@non-threaded.html

  * igt@i915_suspend@forcewake:
    - shard-kbl:          [DMESG-WARN][33] ([i915#180]) -> [PASS][34] +2 similar issues
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-kbl4/igt@i915_suspend@forcewake.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-kbl3/igt@i915_suspend@forcewake.html

  * igt@kms_cursor_legacy@flip-vs-cursor-legacy:
    - shard-skl:          [FAIL][35] ([IGT#5]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-skl7/igt@kms_cursor_legacy@flip-vs-cursor-legacy.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-skl10/igt@kms_cursor_legacy@flip-vs-cursor-legacy.html

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [SKIP][37] ([fdo#109349]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-iclb4/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  * igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled:
    - shard-glk:          [FAIL][39] ([i915#52] / [i915#54]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-glk9/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-glk4/igt@kms_draw_crc@draw-method-rgb565-mmap-wc-xtiled.html

  * igt@kms_hdr@bpc-switch:
    - shard-skl:          [FAIL][41] ([i915#1188]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-skl2/igt@kms_hdr@bpc-switch.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-skl8/igt@kms_hdr@bpc-switch.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
    - shard-apl:          [DMESG-WARN][43] ([i915#180]) -> [PASS][44] +1 similar issue
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-apl1/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-apl1/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min:
    - shard-skl:          [FAIL][45] ([fdo#108145] / [i915#265]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-skl4/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-skl7/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [SKIP][47] ([fdo#109441]) -> [PASS][48] +2 similar issues
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-iclb4/igt@kms_psr@psr2_cursor_render.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-iclb2/igt@kms_psr@psr2_cursor_render.html

  * igt@kms_psr@suspend:
    - shard-skl:          [INCOMPLETE][49] ([i915#198]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-skl6/igt@kms_psr@suspend.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-skl2/igt@kms_psr@suspend.html

  * igt@prime_busy@hang-bsd2:
    - shard-iclb:         [SKIP][51] ([fdo#109276]) -> [PASS][52] +1 similar issue
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8243/shard-iclb6/igt@prime_busy@hang-bsd2.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/shard-iclb4/igt@prime_busy@hang-bsd2.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [IGT#5]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/5
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1528]: https://gitlab.freedesktop.org/drm/intel/issues/1528
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#198]: https://gitlab.freedesktop.org/drm/intel/issues/198
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#52]: https://gitlab.freedesktop.org/drm/intel/issues/52
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#72]: https://gitlab.freedesktop.org/drm/intel/issues/72
  [i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8243 -> Patchwork_17197

  CI-20190529: 20190529
  CI_DRM_8243: 45ccb1b8606b6ba1a5d4f8a8b4dda27bd8dbb04c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5560: 213062c7dcf0cbc8069cbb5f91acbc494def33fd @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17197: 48a06c080f4b9055f83ac771df1ec5506b72bb9c @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17197/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 06/10] dma-buf: Proxy fence, an unsignaled fence placeholder
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 06/10] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
@ 2020-04-05 22:14     ` kbuild test robot
  0 siblings, 0 replies; 31+ messages in thread
From: kbuild test robot @ 2020-04-05 22:14 UTC (permalink / raw)
  To: Chris Wilson; +Cc: clang-built-linux, intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1676 bytes --]

Hi Chris,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next linus/master v5.6 next-20200405]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-selftests-Add-request-throughput-measurement-to-perf/20200404-174829
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-d001-20200405 (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project be84d2b5b7e9c98e93bf8565e3e178e43ea0ec0a)
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_writeout()+0x1: call without frame pointer save/setup
>> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_flush()+0x0: call without frame pointer save/setup
>> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_init()+0x0: call without frame pointer save/setup

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 32451 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 06/10] dma-buf: Proxy fence, an unsignaled fence placeholder
@ 2020-04-05 22:14     ` kbuild test robot
  0 siblings, 0 replies; 31+ messages in thread
From: kbuild test robot @ 2020-04-05 22:14 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1710 bytes --]

Hi Chris,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next linus/master v5.6 next-20200405]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-selftests-Add-request-throughput-measurement-to-perf/20200404-174829
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-d001-20200405 (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project be84d2b5b7e9c98e93bf8565e3e178e43ea0ec0a)
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_writeout()+0x1: call without frame pointer save/setup
>> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_flush()+0x0: call without frame pointer save/setup
>> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_init()+0x0: call without frame pointer save/setup

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 32451 bytes --]

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 06/10] dma-buf: Proxy fence, an unsignaled fence placeholder
  2020-04-05 22:14     ` kbuild test robot
@ 2020-04-06 18:32       ` Nick Desaulniers
  -1 siblings, 0 replies; 31+ messages in thread
From: Nick Desaulniers @ 2020-04-06 18:32 UTC (permalink / raw)
  To: kbuild test robot; +Cc: clang-built-linux, intel-gfx, kbuild-all, Chris Wilson

On Sun, Apr 5, 2020 at 3:16 PM kbuild test robot <lkp@intel.com> wrote:
>
> Hi Chris,
>
> Thank you for the patch! Perhaps something to improve:
>
> [auto build test WARNING on drm-tip/drm-tip]
> [cannot apply to drm-intel/for-linux-next linus/master v5.6 next-20200405]
> [if your patch is applied to the wrong git tree, please drop us a note to help
> improve the system. BTW, we also suggest to use '--base' option to specify the
> base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
>
> url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-selftests-Add-request-throughput-measurement-to-perf/20200404-174829
> base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
> config: x86_64-randconfig-d001-20200405 (attached as .config)
> compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project be84d2b5b7e9c98e93bf8565e3e178e43ea0ec0a)
> reproduce:
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # save the attached .config to linux build tree
>         COMPILER=clang make.cross ARCH=x86_64
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kbuild test robot <lkp@intel.com>
>
> All warnings (new ones prefixed by >>):
>
> >> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_writeout()+0x1: call without frame pointer save/setup
> >> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_flush()+0x0: call without frame pointer save/setup
> >> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_init()+0x0: call without frame pointer save/setup

Sorry for the noise, this is a known pre-existing issue not caused by
this patch.

-- 
Thanks,
~Nick Desaulniers
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 06/10] dma-buf: Proxy fence, an unsignaled fence placeholder
@ 2020-04-06 18:32       ` Nick Desaulniers
  0 siblings, 0 replies; 31+ messages in thread
From: Nick Desaulniers @ 2020-04-06 18:32 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1834 bytes --]

On Sun, Apr 5, 2020 at 3:16 PM kbuild test robot <lkp@intel.com> wrote:
>
> Hi Chris,
>
> Thank you for the patch! Perhaps something to improve:
>
> [auto build test WARNING on drm-tip/drm-tip]
> [cannot apply to drm-intel/for-linux-next linus/master v5.6 next-20200405]
> [if your patch is applied to the wrong git tree, please drop us a note to help
> improve the system. BTW, we also suggest to use '--base' option to specify the
> base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
>
> url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-selftests-Add-request-throughput-measurement-to-perf/20200404-174829
> base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
> config: x86_64-randconfig-d001-20200405 (attached as .config)
> compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project be84d2b5b7e9c98e93bf8565e3e178e43ea0ec0a)
> reproduce:
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # save the attached .config to linux build tree
>         COMPILER=clang make.cross ARCH=x86_64
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kbuild test robot <lkp@intel.com>
>
> All warnings (new ones prefixed by >>):
>
> >> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_writeout()+0x1: call without frame pointer save/setup
> >> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_flush()+0x0: call without frame pointer save/setup
> >> drivers/dma-buf/dma-fence-proxy.o: warning: objtool: __llvm_gcov_init()+0x0: call without frame pointer save/setup

Sorry for the noise, this is a known pre-existing issue not caused by
this patch.

-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 02/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 02/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
@ 2020-04-07  9:07   ` Tvrtko Ursulin
  0 siblings, 0 replies; 31+ messages in thread
From: Tvrtko Ursulin @ 2020-04-07  9:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Kenneth Graunke


On 03/04/2020 10:12, Chris Wilson wrote:
> If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the
> user batch or in our own preamble, the engine raises a
> GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so
> respond to a semaphore wait by yielding the timeslice, if we have
> another context to yield to!
> 
> The only real complication is that the interrupt is only generated for
> the start of the semaphore wait, and is asynchronous to our
> process_csb() -- that is, we may not have registered the timeslice before
> we see the interrupt. To ensure we don't miss a potential semaphore
> blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark
> the interrupt and apply it to the next timeslice regardless of whether it
> was active at the time.
> 
> v2: We use semaphores in preempt-to-busy, within the timeslicing
> implementation itself! Ergo, when we do insert a preemption due to an
> expired timeslice, the new context may start with the missed semaphore
> flagged by the retired context and be yielded, ad infinitum. To avoid
> this, read the context id at the time of the semaphore interrupt and
> only yield if that context is still active.
> 
> Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  6 +++
>   drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 +++++
>   drivers/gpu/drm/i915/gt/intel_gt_irq.c       | 13 ++++++-
>   drivers/gpu/drm/i915/gt/intel_lrc.c          | 40 +++++++++++++++++---
>   drivers/gpu/drm/i915/gt/selftest_lrc.c       | 15 +++-----
>   drivers/gpu/drm/i915/i915_reg.h              |  1 +
>   6 files changed, 67 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 843cb6f2f696..04995040407d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1313,6 +1313,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
>   
>   	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
>   		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
> +	if (HAS_EXECLISTS(dev_priv)) {
> +		drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
> +			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
> +		drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
> +			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
> +	}
>   	drm_printf(m, "\tRING_START: 0x%08x\n",
>   		   ENGINE_READ(engine, RING_START));
>   	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 80cdde712842..ac283ab5d89c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -156,6 +156,15 @@ struct intel_engine_execlists {
>   	 */
>   	struct i915_priolist default_priolist;
>   
> +	/**
> +	 * @yield: CCID at the time of the last semaphore-wait interrupt.
> +	 *
> +	 * Instead of leaving a semaphore busy-spinning on an engine, we would
> +	 * like to switch to another ready context, i.e. yielding the semaphore
> +	 * timeslice.
> +	 */
> +	u32 yield;
> +
>   	/**
>   	 * @error_interrupt: CS Master EIR
>   	 *
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> index f0e7fd95165a..875bd0392ffc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> @@ -39,6 +39,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   		}
>   	}
>   
> +	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
> +		WRITE_ONCE(engine->execlists.yield,
> +			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
> +		if (del_timer(&engine->execlists.timer))
> +			tasklet = true;
> +	}
> +
>   	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
>   		tasklet = true;
>   
> @@ -228,7 +235,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
>   	const u32 irqs =
>   		GT_CS_MASTER_ERROR_INTERRUPT |
>   		GT_RENDER_USER_INTERRUPT |
> -		GT_CONTEXT_SWITCH_INTERRUPT;
> +		GT_CONTEXT_SWITCH_INTERRUPT |
> +		GT_WAIT_SEMAPHORE_INTERRUPT;
>   	struct intel_uncore *uncore = gt->uncore;
>   	const u32 dmask = irqs << 16 | irqs;
>   	const u32 smask = irqs << 16;
> @@ -366,7 +374,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
>   	const u32 irqs =
>   		GT_CS_MASTER_ERROR_INTERRUPT |
>   		GT_RENDER_USER_INTERRUPT |
> -		GT_CONTEXT_SWITCH_INTERRUPT;
> +		GT_CONTEXT_SWITCH_INTERRUPT |
> +		GT_WAIT_SEMAPHORE_INTERRUPT;
>   	const u32 gt_interrupts[] = {
>   		irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
>   		irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index f028114714cd..55a58709590a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1768,7 +1768,8 @@ static void defer_active(struct intel_engine_cs *engine)
>   }
>   
>   static bool
> -need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
> +need_timeslice(const struct intel_engine_cs *engine,
> +	       const struct i915_request *rq)
>   {
>   	int hint;
>   
> @@ -1782,6 +1783,31 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
>   	return hint >= effective_prio(rq);
>   }
>   
> +static bool
> +timeslice_yield(const struct intel_engine_execlists *el,
> +		const struct i915_request *rq)
> +{
> +	/*
> +	 * Once bitten, forever smitten!
> +	 *
> +	 * If the active context ever busy-waited on a semaphore,
> +	 * it will be treated as a hog until the end of its timeslice.
> +	 * The HW only sends an interrupt on the first miss, and we
> +	 * do know if that semaphore has been signaled, or even if it
> +	 * is now stuck on another semaphore. Play safe, yield if it
> +	 * might be stuck -- it will be given a fresh timeslice in
> +	 * the near future.
> +	 */
> +	return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield);
> +}
> +
> +static bool
> +timeslice_expired(const struct intel_engine_execlists *el,
> +		  const struct i915_request *rq)
> +{
> +	return timer_expired(&el->timer) || timeslice_yield(el, rq);
> +}
> +
>   static int
>   switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
>   {
> @@ -1797,8 +1823,7 @@ timeslice(const struct intel_engine_cs *engine)
>   	return READ_ONCE(engine->props.timeslice_duration_ms);
>   }
>   
> -static unsigned long
> -active_timeslice(const struct intel_engine_cs *engine)
> +static unsigned long active_timeslice(const struct intel_engine_cs *engine)
>   {
>   	const struct intel_engine_execlists *execlists = &engine->execlists;
>   	const struct i915_request *rq = *execlists->active;
> @@ -1989,18 +2014,19 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   
>   			last = NULL;
>   		} else if (need_timeslice(engine, last) &&
> -			   timer_expired(&engine->execlists.timer)) {
> +			   timeslice_expired(execlists, last)) {
>   			if (i915_request_completed(last)) {
>   				tasklet_hi_schedule(&execlists->tasklet);
>   				return;
>   			}
>   
>   			ENGINE_TRACE(engine,
> -				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
> +				     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
>   				     last->fence.context,
>   				     last->fence.seqno,
>   				     last->sched.attr.priority,
> -				     execlists->queue_priority_hint);
> +				     execlists->queue_priority_hint,
> +				     yesno(timeslice_yield(execlists, last)));
>   
>   			ring_set_paused(engine, 1);
>   			defer_active(engine);
> @@ -2261,6 +2287,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		}
>   		clear_ports(port + 1, last_port - port);
>   
> +		WRITE_ONCE(execlists->yield, -1);
>   		execlists_submit_ports(engine);
>   		set_preempt_timeout(engine, *active);
>   	} else {
> @@ -4563,6 +4590,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
>   	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
>   	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
>   	engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
> +	engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
>   }
>   
>   static void rcs_submission_override(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 985d4041d929..8e8b0c0ddc76 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -1071,15 +1071,12 @@ static int live_timeslice_rewind(void *arg)
>   		GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
>   
>   		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
> -		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
> -		GEM_BUG_ON(!i915_request_is_active(rq[A2]));
> -		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
> -
> -		/* Wait for the timeslice to kick in */
> -		del_timer(&engine->execlists.timer);
> -		tasklet_hi_schedule(&engine->execlists.tasklet);
> -		intel_engine_flush_submission(engine);
> -
> +		if (i915_request_is_active(rq[A2])) {
> +			/* Wait for the timeslice to kick in */
> +			del_timer(&engine->execlists.timer);
> +			tasklet_hi_schedule(&engine->execlists.tasklet);
> +			intel_engine_flush_submission(engine);
> +		}
>   		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
>   		GEM_BUG_ON(!i915_request_is_active(rq[A1]));
>   		GEM_BUG_ON(!i915_request_is_active(rq[B1]));
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 17484345cb80..f402a9f78969 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   #define GT_BSD_CS_ERROR_INTERRUPT		(1 << 15)
>   #define GT_BSD_USER_INTERRUPT			(1 << 12)
>   #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1	(1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
> +#define GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11) /* bdw+ */
>   #define GT_CONTEXT_SWITCH_INTERRUPT		(1 <<  8)
>   #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT	(1 <<  5) /* !snb */
>   #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 09/10] drm/i915/gem: Allow combining submit-fences with syncobj
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 09/10] drm/i915/gem: Allow combining submit-fences with syncobj Chris Wilson
@ 2020-04-07 10:44   ` Tvrtko Ursulin
  2020-04-07 10:51     ` Chris Wilson
  0 siblings, 1 reply; 31+ messages in thread
From: Tvrtko Ursulin @ 2020-04-07 10:44 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/04/2020 10:12, Chris Wilson wrote:
> Fixes: a88b6e4cbafd ("drm/i915: Allow specification of parallel execbuf")

It looks like new uapi on the technical level, even though from a higher 
level it is just an application of existing uapi across more modes, so 
why fixes and who is the consumer?

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 +++++++---
>   include/uapi/drm/i915_drm.h                    |  7 ++++---
>   2 files changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index bf1b5399ffa3..5c1c5a9eced4 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -2299,7 +2299,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>   		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
>   			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>   
> -		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
> +		fences[n] = ptr_pack_bits(syncobj, fence.flags, 3);
>   	}
>   
>   	return fences;
> @@ -2330,7 +2330,7 @@ await_fence_array(struct i915_execbuffer *eb,
>   		struct dma_fence *fence;
>   		unsigned int flags;
>   
> -		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> +		syncobj = ptr_unpack_bits(fences[n], &flags, 3);
>   		if (!(flags & I915_EXEC_FENCE_WAIT))
>   			continue;
>   
> @@ -2354,7 +2354,11 @@ await_fence_array(struct i915_execbuffer *eb,
>   			spin_unlock(&syncobj->lock);
>   		}
>   
> -		err = i915_request_await_dma_fence(eb->request, fence);
> +		if (flags & I915_EXEC_FENCE_WAIT_SUBMIT)
> +			err = i915_request_await_execution(eb->request, fence,
> +							   eb->engine->bond_execute);
> +		else
> +			err = i915_request_await_dma_fence(eb->request, fence);
>   		dma_fence_put(fence);
>   		if (err < 0)
>   			return err;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 14b67cd6b54b..704dd0e3bc1d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1040,9 +1040,10 @@ struct drm_i915_gem_exec_fence {
>   	 */
>   	__u32 handle;
>   
> -#define I915_EXEC_FENCE_WAIT            (1<<0)
> -#define I915_EXEC_FENCE_SIGNAL          (1<<1)
> -#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
> +#define I915_EXEC_FENCE_WAIT            (1u << 0)
> +#define I915_EXEC_FENCE_SIGNAL          (1u << 1)
> +#define I915_EXEC_FENCE_WAIT_SUBMIT     (1u << 2)
> +#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_WAIT_SUBMIT << 1))
>   	__u32 flags;
>   };
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 10/10] drm/i915/gt: Declare when we enabled timeslicing
  2020-04-03  9:13 ` [Intel-gfx] [PATCH 10/10] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
@ 2020-04-07 10:50   ` Tvrtko Ursulin
  2020-04-07 10:55     ` Chris Wilson
  0 siblings, 1 reply; 31+ messages in thread
From: Tvrtko Ursulin @ 2020-04-07 10:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Kenneth Graunke


On 03/04/2020 10:13, Chris Wilson wrote:
> Let userspace know if they can trust timeslicing by including it as part
> of the I915_PARAM_HAS_SCHEDULER::I915_SCHEDULER_CAP_TIMESLICING
> 
> v2: Only declare timeslicing if we can safely preempt userspace.
> 
> Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Kenneth Graunke <kenneth@whitecape.org>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine.h      | 3 ++-
>   drivers/gpu/drm/i915/gt/intel_engine_user.c | 5 +++++
>   include/uapi/drm/i915_drm.h                 | 1 +
>   3 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index b469de0dd9b6..424672ee7874 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -339,7 +339,8 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
>   	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
>   		return false;
>   
> -	return intel_engine_has_semaphores(engine);
> +	return (intel_engine_has_semaphores(engine) &&
> +		intel_engine_has_preemption(engine));

This is turning off timeslicing on Gen8? Well it wouldn't have worked 
anyway, outside the batch boundaries.. so it does sound technically correct.

>   }
>   
>   #endif /* _INTEL_RINGBUFFER_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 848decee9066..b84fdd722781 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -121,6 +121,11 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
>   			else
>   				disabled |= BIT(map[i].sched);
>   		}
> +
> +		if (intel_engine_has_timeslices(engine))
> +			enabled |= I915_SCHEDULER_CAP_TIMESLICING;
> +		else
> +			disabled |= I915_SCHEDULER_CAP_TIMESLICING;
>   	}
>   
>   	i915->caps.scheduler = enabled & ~disabled;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 704dd0e3bc1d..1ee227b5131a 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -523,6 +523,7 @@ typedef struct drm_i915_irq_wait {
>   #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
>   #define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
>   #define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS	(1ul << 4)
> +#define   I915_SCHEDULER_CAP_TIMESLICING	(1ul << 5)

Split uapi from Gen8 fix?

Regards,

Tvrtko

>   
>   #define I915_PARAM_HUC_STATUS		 42
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 09/10] drm/i915/gem: Allow combining submit-fences with syncobj
  2020-04-07 10:44   ` Tvrtko Ursulin
@ 2020-04-07 10:51     ` Chris Wilson
  2020-04-08  9:28       ` Tvrtko Ursulin
  0 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2020-04-07 10:51 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2020-04-07 11:44:45)
> 
> On 03/04/2020 10:12, Chris Wilson wrote:
> > Fixes: a88b6e4cbafd ("drm/i915: Allow specification of parallel execbuf")
> 
> It looks like new uapi on the technical level, even though from a higher 
> level it is just an application of existing uapi across more modes, so 
> why fixes and who is the consumer?

Submitting semaphores from userspace for batches under construction
[passed between processes via syncobj/sync-file]. iris has a bug where
it is trying to wait on a future fence to be submitted and cannot --
but we already have the uapi to handle that elsewhere.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 10/10] drm/i915/gt: Declare when we enabled timeslicing
  2020-04-07 10:50   ` Tvrtko Ursulin
@ 2020-04-07 10:55     ` Chris Wilson
  0 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2020-04-07 10:55 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: Kenneth Graunke

Quoting Tvrtko Ursulin (2020-04-07 11:50:31)
> 
> On 03/04/2020 10:13, Chris Wilson wrote:
> > Let userspace know if they can trust timeslicing by including it as part
> > of the I915_PARAM_HAS_SCHEDULER::I915_SCHEDULER_CAP_TIMESLICING
> > 
> > v2: Only declare timeslicing if we can safely preempt userspace.
> > 
> > Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Kenneth Graunke <kenneth@whitecape.org>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine.h      | 3 ++-
> >   drivers/gpu/drm/i915/gt/intel_engine_user.c | 5 +++++
> >   include/uapi/drm/i915_drm.h                 | 1 +
> >   3 files changed, 8 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> > index b469de0dd9b6..424672ee7874 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> > @@ -339,7 +339,8 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
> >       if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
> >               return false;
> >   
> > -     return intel_engine_has_semaphores(engine);
> > +     return (intel_engine_has_semaphores(engine) &&
> > +             intel_engine_has_preemption(engine));
> 
> This is turning off timeslicing on Gen8? Well it wouldn't have worked 
> anyway, outside the batch boundaries.. so it does sound technically correct.

...
 
> Split uapi from Gen8 fix?

I don't regard gen8 as broken per se, for the kernel could preempt
between batches -- but under the spotlight of "can userspace use this",
it clearly cannot. Hence why I put them both together, it is not until
userspace needs to control itself, that it becomes a problem.

The igt that test this purposefully do not run on gen8 because I was
aware of the limitations. That should have been a big clue, but it
wasn't until I looked at it from an actual user's perspective did I
realise how important that little detail was. :(
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 09/10] drm/i915/gem: Allow combining submit-fences with syncobj
  2020-04-07 10:51     ` Chris Wilson
@ 2020-04-08  9:28       ` Tvrtko Ursulin
  0 siblings, 0 replies; 31+ messages in thread
From: Tvrtko Ursulin @ 2020-04-08  9:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/04/2020 11:51, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-04-07 11:44:45)
>>
>> On 03/04/2020 10:12, Chris Wilson wrote:
>>> Fixes: a88b6e4cbafd ("drm/i915: Allow specification of parallel execbuf")
>>
>> It looks like new uapi on the technical level, even though from a higher
>> level it is just an application of existing uapi across more modes, so
>> why fixes and who is the consumer?
> 
> Submitting semaphores from userspace for batches under construction
> [passed between processes via syncobj/sync-file]. iris has a bug where
> it is trying to wait on a future fence to be submitted and cannot --
> but we already have the uapi to handle that elsewhere.

I am all for consistent uapi and this looks simple enough to me.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
@ 2020-04-08 19:46   ` Venkata Sandeep Dhanalakota
  2020-04-08 20:00   ` Lionel Landwerlin
  1 sibling, 0 replies; 31+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-08 19:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On 20/04/03 10:12, Chris Wilson wrote:
> Whenever we walk along the dma-fence-chain, we prune signaled links to
> keep the chain nice and tidy. This leads to situations where we can
> prune a link and report the earlier fence as the target seqno --
> violating our own consistency checks that the seqno is not more advanced
> than the last element in a dma-fence-chain.
> 
> Report a NULL fence and success if the seqno has already been signaled.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/dma-buf/dma-fence-chain.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
> index 3d123502ff12..c435bbba851c 100644
> --- a/drivers/dma-buf/dma-fence-chain.c
> +++ b/drivers/dma-buf/dma-fence-chain.c
> @@ -99,6 +99,12 @@ int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
>  		return -EINVAL;
>  
>  	dma_fence_chain_for_each(*pfence, &chain->base) {
> +		if ((*pfence)->seqno < seqno) { /* already signaled */
> +			dma_fence_put(*pfence);
> +			*pfence = NULL;
> +			break;
> +		}
> +
Looks good to me.

Tested-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota@intel.com>
Reviewed-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota@intel.com>

>  		if ((*pfence)->context != chain->base.context ||
>  		    to_dma_fence_chain(*pfence)->prev_seqno < seqno)
>  			break;
> @@ -222,6 +228,7 @@ EXPORT_SYMBOL(dma_fence_chain_ops);
>   * @chain: the chain node to initialize
>   * @prev: the previous fence
>   * @fence: the current fence
> + * @seqno: the sequence number (syncpt) of the fence within the chain
>   *
>   * Initialize a new chain node and either start a new chain or add the node to
>   * the existing chain of the previous fence.
> -- 
> 2.20.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 05/10] dma-buf: Exercise dma-fence-chain under selftests
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 05/10] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
@ 2020-04-08 19:49   ` Venkata Sandeep Dhanalakota
  2020-04-10 16:11   ` Lionel Landwerlin
  1 sibling, 0 replies; 31+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-08 19:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On 20/04/03 10:12, Chris Wilson wrote:
> A few very simple testcases to exercise the dma-fence-chain API.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/dma-buf/Makefile             |   3 +-
>  drivers/dma-buf/selftests.h          |   1 +
>  drivers/dma-buf/st-dma-fence-chain.c | 713 +++++++++++++++++++++++++++
>  3 files changed, 716 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/dma-buf/st-dma-fence-chain.c
> 
> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> index 9c190026bfab..995e05f609ff 100644
> --- a/drivers/dma-buf/Makefile
> +++ b/drivers/dma-buf/Makefile
> @@ -9,6 +9,7 @@ obj-$(CONFIG_UDMABUF)		+= udmabuf.o
>  
>  dmabuf_selftests-y := \
>  	selftest.o \
> -	st-dma-fence.o
> +	st-dma-fence.o \
> +	st-dma-fence-chain.o
>  
>  obj-$(CONFIG_DMABUF_SELFTESTS)	+= dmabuf_selftests.o
> diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h
> index 5320386f02e5..55918ef9adab 100644
> --- a/drivers/dma-buf/selftests.h
> +++ b/drivers/dma-buf/selftests.h
> @@ -11,3 +11,4 @@
>   */
>  selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */
>  selftest(dma_fence, dma_fence)
> +selftest(dma_fence_chain, dma_fence_chain)
> diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
> new file mode 100644
> index 000000000000..bd08ba67b03b
> --- /dev/null
> +++ b/drivers/dma-buf/st-dma-fence-chain.c
> @@ -0,0 +1,713 @@
> +// SPDX-License-Identifier: MIT
> +
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/dma-fence.h>
> +#include <linux/dma-fence-chain.h>
> +#include <linux/kernel.h>
> +#include <linux/kthread.h>
> +#include <linux/mm.h>
> +#include <linux/sched/signal.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/random.h>
> +
> +#include "selftest.h"
> +
> +static struct kmem_cache *slab_fences;
> +
> +static inline struct mock_fence {
> +	struct dma_fence base;
> +	spinlock_t lock;
> +} *to_mock_fence(struct dma_fence *f) {
> +	return container_of(f, struct mock_fence, base);
> +}
> +
> +static const char *mock_name(struct dma_fence *f)
> +{
> +	return "mock";
> +}
> +
> +static void mock_fence_release(struct dma_fence *f)
> +{
> +	kmem_cache_free(slab_fences, to_mock_fence(f));
> +}
> +
> +static const struct dma_fence_ops mock_ops = {
> +	.get_driver_name = mock_name,
> +	.get_timeline_name = mock_name,
> +	.release = mock_fence_release,
> +};
> +
> +static struct dma_fence *mock_fence(void)
> +{
> +	struct mock_fence *f;
> +
> +	f = kmem_cache_alloc(slab_fences, GFP_KERNEL);
> +	if (!f)
> +		return NULL;
> +
> +	spin_lock_init(&f->lock);
> +	dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0);
> +
> +	return &f->base;
> +}
> +
> +static inline struct mock_chain {
> +	struct dma_fence_chain base;
> +} *to_mock_chain(struct dma_fence *f) {
> +	return container_of(f, struct mock_chain, base.base);
> +}
> +
> +static struct dma_fence *mock_chain(struct dma_fence *prev,
> +				    struct dma_fence *fence,
> +				    u64 seqno)
> +{
> +	struct mock_chain *f;
> +
> +	f = kmalloc(sizeof(*f), GFP_KERNEL);
> +	if (!f)
> +		return NULL;
> +
> +	dma_fence_chain_init(&f->base,
> +			     dma_fence_get(prev),
> +			     dma_fence_get(fence),
> +			     seqno);
> +
> +	return &f->base.base;
> +}
> +
> +static int sanitycheck(void *arg)
> +{
> +	struct dma_fence *f, *chain;
> +	int err = 0;
> +
> +	f = mock_fence();
> +	if (!f)
> +		return -ENOMEM;
> +
> +	chain = mock_chain(NULL, f, 1);
> +	if (!chain)
> +		err = -ENOMEM;
> +
> +	dma_fence_signal(f);
> +	dma_fence_put(f);
> +
> +	dma_fence_put(chain);
> +
> +	return err;
> +}
> +
> +struct fence_chains {
> +	unsigned int chain_length;
> +	struct dma_fence **fences;
> +	struct dma_fence **chains;
> +
> +	struct dma_fence *tail;
> +};
> +
> +static uint64_t seqno_inc(unsigned int i)
> +{
> +	return i + 1;
> +}
> +
> +static int fence_chains_init(struct fence_chains *fc, unsigned int count,
> +			     uint64_t (*seqno_fn)(unsigned int))
> +{
> +	unsigned int i;
> +	int err = 0;
> +
> +	fc->chains = kvmalloc_array(count, sizeof(*fc->chains),
> +				    GFP_KERNEL | __GFP_ZERO);
> +	if (!fc->chains)
> +		return -ENOMEM;
> +
> +	fc->fences = kvmalloc_array(count, sizeof(*fc->fences),
> +				    GFP_KERNEL | __GFP_ZERO);
> +	if (!fc->fences) {
> +		err = -ENOMEM;
> +		goto err_chains;
> +	}
> +
> +	fc->tail = NULL;
> +	for (i = 0; i < count; i++) {
> +		fc->fences[i] = mock_fence();
> +		if (!fc->fences[i]) {
> +			err = -ENOMEM;
> +			goto unwind;
> +		}
> +
> +		fc->chains[i] = mock_chain(fc->tail,
> +					   fc->fences[i],
> +					   seqno_fn(i));
> +		if (!fc->chains[i]) {
> +			err = -ENOMEM;
> +			goto unwind;
> +		}
> +
> +		fc->tail = fc->chains[i];
> +	}
> +
> +	fc->chain_length = i;
> +	return 0;
> +
> +unwind:
> +	for (i = 0; i < count; i++) {
> +		dma_fence_put(fc->fences[i]);
> +		dma_fence_put(fc->chains[i]);
> +	}
> +	kvfree(fc->fences);
> +err_chains:
> +	kvfree(fc->chains);
> +	return err;
> +}
> +
> +static void fence_chains_fini(struct fence_chains *fc)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < fc->chain_length; i++) {
> +		dma_fence_signal(fc->fences[i]);
> +		dma_fence_put(fc->fences[i]);
> +	}
> +	kvfree(fc->fences);
> +
> +	for (i = 0; i < fc->chain_length; i++)
> +		dma_fence_put(fc->chains[i]);
> +	kvfree(fc->chains);
> +}
> +
> +static int find_seqno(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct dma_fence *fence;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	fence = dma_fence_get(fc.tail);
> +	err = dma_fence_chain_find_seqno(&fence, 0);
> +	dma_fence_put(fence);
> +	if (err) {
> +		pr_err("Reported %d for find_seqno(0)!\n", err);
> +		goto err;
> +	}
> +
> +	for (i = 0; i < fc.chain_length; i++) {
> +		fence = dma_fence_get(fc.tail);
> +		err = dma_fence_chain_find_seqno(&fence, i + 1);
> +		dma_fence_put(fence);
> +		if (err) {
> +			pr_err("Reported %d for find_seqno(%d:%d)!\n",
> +			       err, fc.chain_length + 1, i + 1);
> +			goto err;
> +		}
> +		if (fence != fc.chains[i]) {
> +			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",
> +			       fc.chain_length + 1, i + 1);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, i + 1);
> +		dma_fence_put(fence);
> +		if (err) {
> +			pr_err("Error reported for finding self\n");
> +			goto err;
> +		}
> +		if (fence != fc.chains[i]) {
> +			pr_err("Incorrect fence reported by find self\n");
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, i + 2);
> +		dma_fence_put(fence);
> +		if (!err) {
> +			pr_err("Error not reported for future fence: find_seqno(%d:%d)!\n",
> +			       i + 1, i + 2);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, i);
> +		dma_fence_put(fence);
> +		if (err) {
> +			pr_err("Error reported for previous fence!\n");
> +			goto err;
> +		}
> +		if (i > 0 && fence != fc.chains[i - 1]) {
> +			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",
> +			       i + 1, i);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static int find_signaled(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct dma_fence *fence;
> +	int err;
> +
> +	err = fence_chains_init(&fc, 2, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	dma_fence_signal(fc.fences[0]);
> +
> +	fence = dma_fence_get(fc.tail);
> +	err = dma_fence_chain_find_seqno(&fence, 1);
> +	dma_fence_put(fence);
> +	if (err) {
> +		pr_err("Reported %d for find_seqno()!\n", err);
> +		goto err;
> +	}
> +
> +	if (fence && fence != fc.chains[0]) {
> +		pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:1\n",
> +		       fence->seqno);
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, 1);
> +		dma_fence_put(fence);
> +		if (err)
> +			pr_err("Reported %d for finding self!\n", err);
> +
> +		err = -EINVAL;
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static int find_out_of_order(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct dma_fence *fence;
> +	int err;
> +
> +	err = fence_chains_init(&fc, 3, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	dma_fence_signal(fc.fences[1]);
> +
> +	fence = dma_fence_get(fc.tail);
> +	err = dma_fence_chain_find_seqno(&fence, 2);
> +	dma_fence_put(fence);
> +	if (err) {
> +		pr_err("Reported %d for find_seqno()!\n", err);
> +		goto err;
> +	}
> +
> +	if (fence && fence != fc.chains[1]) {
> +		pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:2\n",
> +		       fence->seqno);
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, 2);
> +		dma_fence_put(fence);
> +		if (err)
> +			pr_err("Reported %d for finding self!\n", err);
> +
> +		err = -EINVAL;
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static uint64_t seqno_inc2(unsigned int i)
> +{
> +	return 2 * i + 2;
> +}
> +
> +static int find_gap(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct dma_fence *fence;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64, seqno_inc2);
> +	if (err)
> +		return err;
> +
> +	for (i = 0; i < fc.chain_length; i++) {
> +		fence = dma_fence_get(fc.tail);
> +		err = dma_fence_chain_find_seqno(&fence, 2 * i + 1);
> +		dma_fence_put(fence);
> +		if (err) {
> +			pr_err("Reported %d for find_seqno(%d:%d)!\n",
> +			       err, fc.chain_length + 1, 2 * i + 1);
> +			goto err;
> +		}
> +		if (fence != fc.chains[i]) {
> +			pr_err("Incorrect fence.seqno:%lld reported by find_seqno(%d:%d)\n",
> +			       fence->seqno,
> +			       fc.chain_length + 1,
> +			       2 * i + 1);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, 2 * i + 2);
> +		dma_fence_put(fence);
> +		if (err) {
> +			pr_err("Error reported for finding self\n");
> +			goto err;
> +		}
> +		if (fence != fc.chains[i]) {
> +			pr_err("Incorrect fence reported by find self\n");
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +struct find_race {
> +	struct fence_chains fc;
> +	atomic_t children;
> +};
> +
> +static int __find_race(void *arg)
> +{
> +	struct find_race *data = arg;
> +	int err = 0;
> +
> +	while (!kthread_should_stop()) {
> +		struct dma_fence *fence = dma_fence_get(data->fc.tail);
> +		int seqno;
> +
> +		seqno = prandom_u32_max(data->fc.chain_length) + 1;
> +
> +		err = dma_fence_chain_find_seqno(&fence, seqno);
> +		if (err) {
> +			pr_err("Failed to find fence seqno:%d\n",
> +			       seqno);
> +			dma_fence_put(fence);
> +			break;
> +		}
> +		if (!fence)
> +			goto signal;
> +
> +		err = dma_fence_chain_find_seqno(&fence, seqno);
> +		if (err) {
> +			pr_err("Reported an invalid fence for find-self:%d\n",
> +			       seqno);
> +			dma_fence_put(fence);
> +			break;
> +		}
> +
> +		if (fence->seqno < seqno) {
> +			pr_err("Reported an earlier fence.seqno:%lld for seqno:%d\n",
> +			       fence->seqno, seqno);
> +			err = -EINVAL;
> +			dma_fence_put(fence);
> +			break;
> +		}
> +
> +		dma_fence_put(fence);
> +
> +signal:
> +		seqno = prandom_u32_max(data->fc.chain_length - 1);
> +		dma_fence_signal(data->fc.fences[seqno]);
> +		cond_resched();
> +	}
> +
> +	if (atomic_dec_and_test(&data->children))
> +		wake_up_var(&data->children);
> +	return err;
> +}
> +
> +static int find_race(void *arg)
> +{
> +	struct find_race data;
> +	int ncpus = num_online_cpus();
> +	struct task_struct **threads;
> +	unsigned long count;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&data.fc, 64 << 10, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
> +	if (!threads) {
> +		err = -ENOMEM;
> +		goto err;
> +	}
> +
> +	atomic_set(&data.children, 0);
> +	for (i = 0; i < ncpus; i++) {
> +		threads[i] = kthread_run(__find_race, &data, "dmabuf/%d", i);
> +		if (IS_ERR(threads[i])) {
> +			ncpus = i;
> +			break;
> +		}
> +		atomic_inc(&data.children);
> +		get_task_struct(threads[i]);
> +	}
> +
> +	wait_var_event_timeout(&data.children,
> +			       !atomic_read(&data.children),
> +			       5 * HZ);
> +
> +	for (i = 0; i < ncpus; i++) {
> +		int ret;
> +
> +		ret = kthread_stop(threads[i]);
> +		if (ret && !err)
> +			err = ret;
> +		put_task_struct(threads[i]);
> +	}
> +	kfree(threads);
> +
> +	count = 0;
> +	for (i = 0; i < data.fc.chain_length; i++)
> +		if (dma_fence_is_signaled(data.fc.fences[i]))
> +			count++;
> +	pr_info("Completed %lu cycles\n", count);
> +
> +err:
> +	fence_chains_fini(&data.fc);
> +	return err;
> +}
> +
> +static int signal_forward(void *arg)
> +{
> +	struct fence_chains fc;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	for (i = 0; i < fc.chain_length; i++) {
> +		dma_fence_signal(fc.fences[i]);
> +
> +		if (!dma_fence_is_signaled(fc.chains[i])) {
> +			pr_err("chain[%d] not signaled!\n", i);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		if (i + 1 < fc.chain_length &&
> +		    dma_fence_is_signaled(fc.chains[i + 1])) {
> +			pr_err("chain[%d] is signaled!\n", i);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static int signal_backward(void *arg)
> +{
> +	struct fence_chains fc;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	for (i = fc.chain_length; i--; ) {
> +		dma_fence_signal(fc.fences[i]);
> +
> +		if (i > 0 && dma_fence_is_signaled(fc.chains[i])) {
> +			pr_err("chain[%d] is signaled!\n", i);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +	for (i = 0; i < fc.chain_length; i++) {
> +		if (!dma_fence_is_signaled(fc.chains[i])) {
> +			pr_err("chain[%d] was not signaled!\n", i);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static int __wait_fence_chains(void *arg)
> +{
> +	struct fence_chains *fc = arg;
> +
> +	if (dma_fence_wait(fc->tail, false))
> +		return -EIO;
> +
> +	return 0;
> +}
> +
> +static int wait_forward(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct task_struct *tsk;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
> +	if (IS_ERR(tsk)) {
> +		err = PTR_ERR(tsk);
> +		goto err;
> +	}
> +	get_task_struct(tsk);
> +	yield_to(tsk, true);
> +
> +	for (i = 0; i < fc.chain_length; i++)
> +		dma_fence_signal(fc.fences[i]);
> +
> +	err = kthread_stop(tsk);
> +	put_task_struct(tsk);
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static int wait_backward(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct task_struct *tsk;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
> +	if (IS_ERR(tsk)) {
> +		err = PTR_ERR(tsk);
> +		goto err;
> +	}
> +	get_task_struct(tsk);
> +	yield_to(tsk, true);
> +
> +	for (i = fc.chain_length; i--; )
> +		dma_fence_signal(fc.fences[i]);
> +
> +	err = kthread_stop(tsk);
> +	put_task_struct(tsk);
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static void randomise_fences(struct fence_chains *fc)
> +{
> +	unsigned int count = fc->chain_length;
> +
> +	/* Fisher-Yates shuffle courtesy of Knuth */
> +	while (--count) {
> +		unsigned int swp;
> +
> +		swp = prandom_u32_max(count + 1);
> +		if (swp == count)
> +			continue;
> +
> +		swap(fc->fences[count], fc->fences[swp]);
> +	}
> +}
> +
> +static int wait_random(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct task_struct *tsk;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	randomise_fences(&fc);
> +
> +	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
> +	if (IS_ERR(tsk)) {
> +		err = PTR_ERR(tsk);
> +		goto err;
> +	}
> +	get_task_struct(tsk);
> +	yield_to(tsk, true);
> +
> +	for (i = 0; i < fc.chain_length; i++)
> +		dma_fence_signal(fc.fences[i]);
> +
> +	err = kthread_stop(tsk);
> +	put_task_struct(tsk);
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +int dma_fence_chain(void)
> +{
> +	static const struct subtest tests[] = {
> +		SUBTEST(sanitycheck),
> +		SUBTEST(find_seqno),
> +		SUBTEST(find_signaled),
> +		SUBTEST(find_out_of_order),
> +		SUBTEST(find_gap),
> +		SUBTEST(find_race),
> +		SUBTEST(signal_forward),
> +		SUBTEST(signal_backward),
> +		SUBTEST(wait_forward),
> +		SUBTEST(wait_backward),
> +		SUBTEST(wait_random),
> +	};
> +	int ret;
> +
> +	pr_info("sizeof(dma_fence_chain)=%zu\n",
> +		sizeof(struct dma_fence_chain));
> +
> +	slab_fences = KMEM_CACHE(mock_fence,
> +				 SLAB_TYPESAFE_BY_RCU |
> +				 SLAB_HWCACHE_ALIGN);
> +	if (!slab_fences)
> +		return -ENOMEM;
> +
> +	ret = subtests(tests, NULL);
> +
> +	kmem_cache_destroy(slab_fences);
> +	return ret;
> +}
I think it covers all api required for fence-chain.
Reviewed-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota@intel.com>
> -- 
> 2.20.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
  2020-04-08 19:46   ` Venkata Sandeep Dhanalakota
@ 2020-04-08 20:00   ` Lionel Landwerlin
  2020-04-09 10:52     ` Chris Wilson
  1 sibling, 1 reply; 31+ messages in thread
From: Lionel Landwerlin @ 2020-04-08 20:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 03/04/2020 12:12, Chris Wilson wrote:
> Whenever we walk along the dma-fence-chain, we prune signaled links to
> keep the chain nice and tidy. This leads to situations where we can
> prune a link and report the earlier fence as the target seqno --
> violating our own consistency checks that the seqno is not more advanced
> than the last element in a dma-fence-chain.
>
> Report a NULL fence and success if the seqno has already been signaled.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/dma-buf/dma-fence-chain.c | 7 +++++++
>   1 file changed, 7 insertions(+)
>
> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
> index 3d123502ff12..c435bbba851c 100644
> --- a/drivers/dma-buf/dma-fence-chain.c
> +++ b/drivers/dma-buf/dma-fence-chain.c
> @@ -99,6 +99,12 @@ int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
>   		return -EINVAL;
>   
>   	dma_fence_chain_for_each(*pfence, &chain->base) {
> +		if ((*pfence)->seqno < seqno) { /* already signaled */
> +			dma_fence_put(*pfence);
> +			*pfence = NULL;
> +			break;
> +		}
> +


Wouldn't this condition been fulfilled in the previous check? :


chain = to_dma_fence_chain(*pfence);
if (!chain || chain->base.seqno < seqno)
         return -EINVAL;

-Lionel

>   		if ((*pfence)->context != chain->base.context ||
>   		    to_dma_fence_chain(*pfence)->prev_seqno < seqno)
>   			break;
> @@ -222,6 +228,7 @@ EXPORT_SYMBOL(dma_fence_chain_ops);
>    * @chain: the chain node to initialize
>    * @prev: the previous fence
>    * @fence: the current fence
> + * @seqno: the sequence number (syncpt) of the fence within the chain
>    *
>    * Initialize a new chain node and either start a new chain or add the node to
>    * the existing chain of the previous fence.


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain
  2020-04-08 20:00   ` Lionel Landwerlin
@ 2020-04-09 10:52     ` Chris Wilson
  2020-04-09 11:16       ` Lionel Landwerlin
  0 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2020-04-09 10:52 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2020-04-08 21:00:59)
> On 03/04/2020 12:12, Chris Wilson wrote:
> > Whenever we walk along the dma-fence-chain, we prune signaled links to
> > keep the chain nice and tidy. This leads to situations where we can
> > prune a link and report the earlier fence as the target seqno --
> > violating our own consistency checks that the seqno is not more advanced
> > than the last element in a dma-fence-chain.
> >
> > Report a NULL fence and success if the seqno has already been signaled.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/dma-buf/dma-fence-chain.c | 7 +++++++
> >   1 file changed, 7 insertions(+)
> >
> > diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
> > index 3d123502ff12..c435bbba851c 100644
> > --- a/drivers/dma-buf/dma-fence-chain.c
> > +++ b/drivers/dma-buf/dma-fence-chain.c
> > @@ -99,6 +99,12 @@ int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
> >               return -EINVAL;
> >   
> >       dma_fence_chain_for_each(*pfence, &chain->base) {
> > +             if ((*pfence)->seqno < seqno) { /* already signaled */
> > +                     dma_fence_put(*pfence);
> > +                     *pfence = NULL;
> > +                     break;
> > +             }
> > +
> 
> 
> Wouldn't this condition been fulfilled in the previous check? :
> 
> 
> chain = to_dma_fence_chain(*pfence);
> if (!chain || chain->base.seqno < seqno)
>          return -EINVAL;

The problem is in the chain iteration. It assumes that an unordered set
of fences is in the order of the user's seqno. There are no restrictions
placed on the chain, so we must apply the ordering from the timeline seqno
directly.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain
  2020-04-09 10:52     ` Chris Wilson
@ 2020-04-09 11:16       ` Lionel Landwerlin
  2020-04-09 13:46         ` Chris Wilson
  0 siblings, 1 reply; 31+ messages in thread
From: Lionel Landwerlin @ 2020-04-09 11:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 09/04/2020 13:52, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2020-04-08 21:00:59)
>> On 03/04/2020 12:12, Chris Wilson wrote:
>>> Whenever we walk along the dma-fence-chain, we prune signaled links to
>>> keep the chain nice and tidy. This leads to situations where we can
>>> prune a link and report the earlier fence as the target seqno --
>>> violating our own consistency checks that the seqno is not more advanced
>>> than the last element in a dma-fence-chain.
>>>
>>> Report a NULL fence and success if the seqno has already been signaled.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    drivers/dma-buf/dma-fence-chain.c | 7 +++++++
>>>    1 file changed, 7 insertions(+)
>>>
>>> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
>>> index 3d123502ff12..c435bbba851c 100644
>>> --- a/drivers/dma-buf/dma-fence-chain.c
>>> +++ b/drivers/dma-buf/dma-fence-chain.c
>>> @@ -99,6 +99,12 @@ int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
>>>                return -EINVAL;
>>>    
>>>        dma_fence_chain_for_each(*pfence, &chain->base) {
>>> +             if ((*pfence)->seqno < seqno) { /* already signaled */
>>> +                     dma_fence_put(*pfence);
>>> +                     *pfence = NULL;
>>> +                     break;
>>> +             }
>>> +
>>
>> Wouldn't this condition been fulfilled in the previous check? :
>>
>>
>> chain = to_dma_fence_chain(*pfence);
>> if (!chain || chain->base.seqno < seqno)
>>           return -EINVAL;
> The problem is in the chain iteration. It assumes that an unordered set
> of fences is in the order of the user's seqno. There are no restrictions
> placed on the chain, so we must apply the ordering from the timeline seqno
> directly.
> -Chris


I don't really understand that. chain->seqno should be ordered because 
chain->prev_seqno <= chain->seqno.

Do you have an example where this is not the case?


-Lionel

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain
  2020-04-09 11:16       ` Lionel Landwerlin
@ 2020-04-09 13:46         ` Chris Wilson
  0 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2020-04-09 13:46 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2020-04-09 12:16:48)
> On 09/04/2020 13:52, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2020-04-08 21:00:59)
> >> On 03/04/2020 12:12, Chris Wilson wrote:
> >>> Whenever we walk along the dma-fence-chain, we prune signaled links to
> >>> keep the chain nice and tidy. This leads to situations where we can
> >>> prune a link and report the earlier fence as the target seqno --
> >>> violating our own consistency checks that the seqno is not more advanced
> >>> than the last element in a dma-fence-chain.
> >>>
> >>> Report a NULL fence and success if the seqno has already been signaled.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> ---
> >>>    drivers/dma-buf/dma-fence-chain.c | 7 +++++++
> >>>    1 file changed, 7 insertions(+)
> >>>
> >>> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
> >>> index 3d123502ff12..c435bbba851c 100644
> >>> --- a/drivers/dma-buf/dma-fence-chain.c
> >>> +++ b/drivers/dma-buf/dma-fence-chain.c
> >>> @@ -99,6 +99,12 @@ int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
> >>>                return -EINVAL;
> >>>    
> >>>        dma_fence_chain_for_each(*pfence, &chain->base) {
> >>> +             if ((*pfence)->seqno < seqno) { /* already signaled */
> >>> +                     dma_fence_put(*pfence);
> >>> +                     *pfence = NULL;
> >>> +                     break;
> >>> +             }
> >>> +
> >>
> >> Wouldn't this condition been fulfilled in the previous check? :
> >>
> >>
> >> chain = to_dma_fence_chain(*pfence);
> >> if (!chain || chain->base.seqno < seqno)
> >>           return -EINVAL;
> > The problem is in the chain iteration. It assumes that an unordered set
> > of fences is in the order of the user's seqno. There are no restrictions
> > placed on the chain, so we must apply the ordering from the timeline seqno
> > directly.
> > -Chris
> 
> 
> I don't really understand that. chain->seqno should be ordered because 
> chain->prev_seqno <= chain->seqno.
> 
> Do you have an example where this is not the case?

See the failing test case.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [Intel-gfx] [PATCH 05/10] dma-buf: Exercise dma-fence-chain under selftests
  2020-04-03  9:12 ` [Intel-gfx] [PATCH 05/10] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
  2020-04-08 19:49   ` Venkata Sandeep Dhanalakota
@ 2020-04-10 16:11   ` Lionel Landwerlin
  1 sibling, 0 replies; 31+ messages in thread
From: Lionel Landwerlin @ 2020-04-10 16:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 03/04/2020 12:12, Chris Wilson wrote:
> A few very simple testcases to exercise the dma-fence-chain API.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/dma-buf/Makefile             |   3 +-
>   drivers/dma-buf/selftests.h          |   1 +
>   drivers/dma-buf/st-dma-fence-chain.c | 713 +++++++++++++++++++++++++++
>   3 files changed, 716 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/dma-buf/st-dma-fence-chain.c
>
> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> index 9c190026bfab..995e05f609ff 100644
> --- a/drivers/dma-buf/Makefile
> +++ b/drivers/dma-buf/Makefile
> @@ -9,6 +9,7 @@ obj-$(CONFIG_UDMABUF)		+= udmabuf.o
>   
>   dmabuf_selftests-y := \
>   	selftest.o \
> -	st-dma-fence.o
> +	st-dma-fence.o \
> +	st-dma-fence-chain.o
>   
>   obj-$(CONFIG_DMABUF_SELFTESTS)	+= dmabuf_selftests.o
> diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h
> index 5320386f02e5..55918ef9adab 100644
> --- a/drivers/dma-buf/selftests.h
> +++ b/drivers/dma-buf/selftests.h
> @@ -11,3 +11,4 @@
>    */
>   selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */
>   selftest(dma_fence, dma_fence)
> +selftest(dma_fence_chain, dma_fence_chain)
> diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
> new file mode 100644
> index 000000000000..bd08ba67b03b
> --- /dev/null
> +++ b/drivers/dma-buf/st-dma-fence-chain.c
> @@ -0,0 +1,713 @@
> +// SPDX-License-Identifier: MIT
> +
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/dma-fence.h>
> +#include <linux/dma-fence-chain.h>
> +#include <linux/kernel.h>
> +#include <linux/kthread.h>
> +#include <linux/mm.h>
> +#include <linux/sched/signal.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/random.h>
> +
> +#include "selftest.h"
> +
> +static struct kmem_cache *slab_fences;
> +
> +static inline struct mock_fence {
> +	struct dma_fence base;
> +	spinlock_t lock;
> +} *to_mock_fence(struct dma_fence *f) {
> +	return container_of(f, struct mock_fence, base);
> +}
> +
> +static const char *mock_name(struct dma_fence *f)
> +{
> +	return "mock";
> +}
> +
> +static void mock_fence_release(struct dma_fence *f)
> +{
> +	kmem_cache_free(slab_fences, to_mock_fence(f));
> +}
> +
> +static const struct dma_fence_ops mock_ops = {
> +	.get_driver_name = mock_name,
> +	.get_timeline_name = mock_name,
> +	.release = mock_fence_release,
> +};
> +
> +static struct dma_fence *mock_fence(void)
> +{
> +	struct mock_fence *f;
> +
> +	f = kmem_cache_alloc(slab_fences, GFP_KERNEL);
> +	if (!f)
> +		return NULL;
> +
> +	spin_lock_init(&f->lock);
> +	dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0);
> +
> +	return &f->base;
> +}
> +
> +static inline struct mock_chain {
> +	struct dma_fence_chain base;
> +} *to_mock_chain(struct dma_fence *f) {
> +	return container_of(f, struct mock_chain, base.base);
> +}
> +
> +static struct dma_fence *mock_chain(struct dma_fence *prev,
> +				    struct dma_fence *fence,
> +				    u64 seqno)
> +{
> +	struct mock_chain *f;
> +
> +	f = kmalloc(sizeof(*f), GFP_KERNEL);
> +	if (!f)
> +		return NULL;
> +
> +	dma_fence_chain_init(&f->base,
> +			     dma_fence_get(prev),
> +			     dma_fence_get(fence),
> +			     seqno);
> +
> +	return &f->base.base;
> +}
> +
> +static int sanitycheck(void *arg)
> +{
> +	struct dma_fence *f, *chain;
> +	int err = 0;
> +
> +	f = mock_fence();
> +	if (!f)
> +		return -ENOMEM;
> +
> +	chain = mock_chain(NULL, f, 1);
> +	if (!chain)
> +		err = -ENOMEM;
> +
> +	dma_fence_signal(f);
> +	dma_fence_put(f);
> +
> +	dma_fence_put(chain);
> +
> +	return err;
> +}
> +
> +struct fence_chains {
> +	unsigned int chain_length;
> +	struct dma_fence **fences;
> +	struct dma_fence **chains;
> +
> +	struct dma_fence *tail;
> +};
> +
> +static uint64_t seqno_inc(unsigned int i)
> +{
> +	return i + 1;
> +}
> +
> +static int fence_chains_init(struct fence_chains *fc, unsigned int count,
> +			     uint64_t (*seqno_fn)(unsigned int))
> +{
> +	unsigned int i;
> +	int err = 0;
> +
> +	fc->chains = kvmalloc_array(count, sizeof(*fc->chains),
> +				    GFP_KERNEL | __GFP_ZERO);
> +	if (!fc->chains)
> +		return -ENOMEM;
> +
> +	fc->fences = kvmalloc_array(count, sizeof(*fc->fences),
> +				    GFP_KERNEL | __GFP_ZERO);
> +	if (!fc->fences) {
> +		err = -ENOMEM;
> +		goto err_chains;
> +	}
> +
> +	fc->tail = NULL;
> +	for (i = 0; i < count; i++) {
> +		fc->fences[i] = mock_fence();
> +		if (!fc->fences[i]) {
> +			err = -ENOMEM;
> +			goto unwind;
> +		}
> +
> +		fc->chains[i] = mock_chain(fc->tail,
> +					   fc->fences[i],
> +					   seqno_fn(i));
> +		if (!fc->chains[i]) {
> +			err = -ENOMEM;
> +			goto unwind;
> +		}
> +
> +		fc->tail = fc->chains[i];
> +	}
> +
> +	fc->chain_length = i;
> +	return 0;
> +
> +unwind:
> +	for (i = 0; i < count; i++) {
> +		dma_fence_put(fc->fences[i]);
> +		dma_fence_put(fc->chains[i]);
> +	}
> +	kvfree(fc->fences);
> +err_chains:
> +	kvfree(fc->chains);
> +	return err;
> +}
> +
> +static void fence_chains_fini(struct fence_chains *fc)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < fc->chain_length; i++) {
> +		dma_fence_signal(fc->fences[i]);
> +		dma_fence_put(fc->fences[i]);
> +	}
> +	kvfree(fc->fences);
> +
> +	for (i = 0; i < fc->chain_length; i++)
> +		dma_fence_put(fc->chains[i]);
> +	kvfree(fc->chains);
> +}
> +
> +static int find_seqno(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct dma_fence *fence;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	fence = dma_fence_get(fc.tail);
> +	err = dma_fence_chain_find_seqno(&fence, 0);
> +	dma_fence_put(fence);
> +	if (err) {
> +		pr_err("Reported %d for find_seqno(0)!\n", err);
> +		goto err;
> +	}
> +
> +	for (i = 0; i < fc.chain_length; i++) {
> +		fence = dma_fence_get(fc.tail);
> +		err = dma_fence_chain_find_seqno(&fence, i + 1);
> +		dma_fence_put(fence);
> +		if (err) {
> +			pr_err("Reported %d for find_seqno(%d:%d)!\n",
> +			       err, fc.chain_length + 1, i + 1);
> +			goto err;
> +		}
> +		if (fence != fc.chains[i]) {
> +			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",
> +			       fc.chain_length + 1, i + 1);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, i + 1);
> +		dma_fence_put(fence);
> +		if (err) {
> +			pr_err("Error reported for finding self\n");
> +			goto err;
> +		}
> +		if (fence != fc.chains[i]) {
> +			pr_err("Incorrect fence reported by find self\n");
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, i + 2);
> +		dma_fence_put(fence);
> +		if (!err) {
> +			pr_err("Error not reported for future fence: find_seqno(%d:%d)!\n",
> +			       i + 1, i + 2);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, i);
> +		dma_fence_put(fence);
> +		if (err) {
> +			pr_err("Error reported for previous fence!\n");
> +			goto err;
> +		}
> +		if (i > 0 && fence != fc.chains[i - 1]) {
> +			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",
> +			       i + 1, i);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static int find_signaled(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct dma_fence *fence;
> +	int err;
> +
> +	err = fence_chains_init(&fc, 2, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	dma_fence_signal(fc.fences[0]);
> +
> +	fence = dma_fence_get(fc.tail);
> +	err = dma_fence_chain_find_seqno(&fence, 1);
> +	dma_fence_put(fence);
> +	if (err) {
> +		pr_err("Reported %d for find_seqno()!\n", err);
> +		goto err;
> +	}
> +
> +	if (fence && fence != fc.chains[0]) {
> +		pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:1\n",
> +		       fence->seqno);
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, 1);
> +		dma_fence_put(fence);
> +		if (err)
> +			pr_err("Reported %d for finding self!\n", err);
> +
> +		err = -EINVAL;
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static int find_out_of_order(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct dma_fence *fence;
> +	int err;
> +
> +	err = fence_chains_init(&fc, 3, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	dma_fence_signal(fc.fences[1]);
> +
> +	fence = dma_fence_get(fc.tail);
> +	err = dma_fence_chain_find_seqno(&fence, 2);
> +	dma_fence_put(fence);
> +	if (err) {
> +		pr_err("Reported %d for find_seqno()!\n", err);
> +		goto err;
> +	}
> +


I don't think this test is right.


You have 3 dma-fence-chains :

1 2 & 3


You signaled the underlying fence of chains[1] (seqno 2) above, but 
chains[0] (seqno 1) is still not signaled.

The principle behind fence chain is that a given seqno is signaled 
whenever every seqno prior to that are signaled.

Therefore when you look for seqno 2, you should be given anything that 
hasn't completed before 2.

In this case that would be chains[0] which is seqno 1.


-Lionel


> +	if (fence && fence != fc.chains[1]) {
> +		pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:2\n",
> +		       fence->seqno);
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, 2);
> +		dma_fence_put(fence);
> +		if (err)
> +			pr_err("Reported %d for finding self!\n", err);
> +
> +		err = -EINVAL;
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static uint64_t seqno_inc2(unsigned int i)
> +{
> +	return 2 * i + 2;
> +}
> +
> +static int find_gap(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct dma_fence *fence;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64, seqno_inc2);
> +	if (err)
> +		return err;
> +
> +	for (i = 0; i < fc.chain_length; i++) {
> +		fence = dma_fence_get(fc.tail);
> +		err = dma_fence_chain_find_seqno(&fence, 2 * i + 1);
> +		dma_fence_put(fence);
> +		if (err) {
> +			pr_err("Reported %d for find_seqno(%d:%d)!\n",
> +			       err, fc.chain_length + 1, 2 * i + 1);
> +			goto err;
> +		}
> +		if (fence != fc.chains[i]) {
> +			pr_err("Incorrect fence.seqno:%lld reported by find_seqno(%d:%d)\n",
> +			       fence->seqno,
> +			       fc.chain_length + 1,
> +			       2 * i + 1);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		dma_fence_get(fence);
> +		err = dma_fence_chain_find_seqno(&fence, 2 * i + 2);
> +		dma_fence_put(fence);
> +		if (err) {
> +			pr_err("Error reported for finding self\n");
> +			goto err;
> +		}
> +		if (fence != fc.chains[i]) {
> +			pr_err("Incorrect fence reported by find self\n");
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +struct find_race {
> +	struct fence_chains fc;
> +	atomic_t children;
> +};
> +
> +static int __find_race(void *arg)
> +{
> +	struct find_race *data = arg;
> +	int err = 0;
> +
> +	while (!kthread_should_stop()) {
> +		struct dma_fence *fence = dma_fence_get(data->fc.tail);
> +		int seqno;
> +
> +		seqno = prandom_u32_max(data->fc.chain_length) + 1;
> +
> +		err = dma_fence_chain_find_seqno(&fence, seqno);
> +		if (err) {
> +			pr_err("Failed to find fence seqno:%d\n",
> +			       seqno);
> +			dma_fence_put(fence);
> +			break;
> +		}
> +		if (!fence)
> +			goto signal;
> +
> +		err = dma_fence_chain_find_seqno(&fence, seqno);
> +		if (err) {
> +			pr_err("Reported an invalid fence for find-self:%d\n",
> +			       seqno);
> +			dma_fence_put(fence);
> +			break;
> +		}
> +
> +		if (fence->seqno < seqno) {
> +			pr_err("Reported an earlier fence.seqno:%lld for seqno:%d\n",
> +			       fence->seqno, seqno);
> +			err = -EINVAL;
> +			dma_fence_put(fence);
> +			break;
> +		}
> +
> +		dma_fence_put(fence);
> +
> +signal:
> +		seqno = prandom_u32_max(data->fc.chain_length - 1);
> +		dma_fence_signal(data->fc.fences[seqno]);
> +		cond_resched();
> +	}
> +
> +	if (atomic_dec_and_test(&data->children))
> +		wake_up_var(&data->children);
> +	return err;
> +}
> +
> +static int find_race(void *arg)
> +{
> +	struct find_race data;
> +	int ncpus = num_online_cpus();
> +	struct task_struct **threads;
> +	unsigned long count;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&data.fc, 64 << 10, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
> +	if (!threads) {
> +		err = -ENOMEM;
> +		goto err;
> +	}
> +
> +	atomic_set(&data.children, 0);
> +	for (i = 0; i < ncpus; i++) {
> +		threads[i] = kthread_run(__find_race, &data, "dmabuf/%d", i);
> +		if (IS_ERR(threads[i])) {
> +			ncpus = i;
> +			break;
> +		}
> +		atomic_inc(&data.children);
> +		get_task_struct(threads[i]);
> +	}
> +
> +	wait_var_event_timeout(&data.children,
> +			       !atomic_read(&data.children),
> +			       5 * HZ);
> +
> +	for (i = 0; i < ncpus; i++) {
> +		int ret;
> +
> +		ret = kthread_stop(threads[i]);
> +		if (ret && !err)
> +			err = ret;
> +		put_task_struct(threads[i]);
> +	}
> +	kfree(threads);
> +
> +	count = 0;
> +	for (i = 0; i < data.fc.chain_length; i++)
> +		if (dma_fence_is_signaled(data.fc.fences[i]))
> +			count++;
> +	pr_info("Completed %lu cycles\n", count);
> +
> +err:
> +	fence_chains_fini(&data.fc);
> +	return err;
> +}
> +
> +static int signal_forward(void *arg)
> +{
> +	struct fence_chains fc;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	for (i = 0; i < fc.chain_length; i++) {
> +		dma_fence_signal(fc.fences[i]);
> +
> +		if (!dma_fence_is_signaled(fc.chains[i])) {
> +			pr_err("chain[%d] not signaled!\n", i);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		if (i + 1 < fc.chain_length &&
> +		    dma_fence_is_signaled(fc.chains[i + 1])) {
> +			pr_err("chain[%d] is signaled!\n", i);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static int signal_backward(void *arg)
> +{
> +	struct fence_chains fc;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	for (i = fc.chain_length; i--; ) {
> +		dma_fence_signal(fc.fences[i]);
> +
> +		if (i > 0 && dma_fence_is_signaled(fc.chains[i])) {
> +			pr_err("chain[%d] is signaled!\n", i);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +	for (i = 0; i < fc.chain_length; i++) {
> +		if (!dma_fence_is_signaled(fc.chains[i])) {
> +			pr_err("chain[%d] was not signaled!\n", i);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +	}
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static int __wait_fence_chains(void *arg)
> +{
> +	struct fence_chains *fc = arg;
> +
> +	if (dma_fence_wait(fc->tail, false))
> +		return -EIO;
> +
> +	return 0;
> +}
> +
> +static int wait_forward(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct task_struct *tsk;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
> +	if (IS_ERR(tsk)) {
> +		err = PTR_ERR(tsk);
> +		goto err;
> +	}
> +	get_task_struct(tsk);
> +	yield_to(tsk, true);
> +
> +	for (i = 0; i < fc.chain_length; i++)
> +		dma_fence_signal(fc.fences[i]);
> +
> +	err = kthread_stop(tsk);
> +	put_task_struct(tsk);
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static int wait_backward(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct task_struct *tsk;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
> +	if (IS_ERR(tsk)) {
> +		err = PTR_ERR(tsk);
> +		goto err;
> +	}
> +	get_task_struct(tsk);
> +	yield_to(tsk, true);
> +
> +	for (i = fc.chain_length; i--; )
> +		dma_fence_signal(fc.fences[i]);
> +
> +	err = kthread_stop(tsk);
> +	put_task_struct(tsk);
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +static void randomise_fences(struct fence_chains *fc)
> +{
> +	unsigned int count = fc->chain_length;
> +
> +	/* Fisher-Yates shuffle courtesy of Knuth */
> +	while (--count) {
> +		unsigned int swp;
> +
> +		swp = prandom_u32_max(count + 1);
> +		if (swp == count)
> +			continue;
> +
> +		swap(fc->fences[count], fc->fences[swp]);
> +	}
> +}
> +
> +static int wait_random(void *arg)
> +{
> +	struct fence_chains fc;
> +	struct task_struct *tsk;
> +	int err;
> +	int i;
> +
> +	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
> +	if (err)
> +		return err;
> +
> +	randomise_fences(&fc);
> +
> +	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
> +	if (IS_ERR(tsk)) {
> +		err = PTR_ERR(tsk);
> +		goto err;
> +	}
> +	get_task_struct(tsk);
> +	yield_to(tsk, true);
> +
> +	for (i = 0; i < fc.chain_length; i++)
> +		dma_fence_signal(fc.fences[i]);
> +
> +	err = kthread_stop(tsk);
> +	put_task_struct(tsk);
> +
> +err:
> +	fence_chains_fini(&fc);
> +	return err;
> +}
> +
> +int dma_fence_chain(void)
> +{
> +	static const struct subtest tests[] = {
> +		SUBTEST(sanitycheck),
> +		SUBTEST(find_seqno),
> +		SUBTEST(find_signaled),
> +		SUBTEST(find_out_of_order),
> +		SUBTEST(find_gap),
> +		SUBTEST(find_race),
> +		SUBTEST(signal_forward),
> +		SUBTEST(signal_backward),
> +		SUBTEST(wait_forward),
> +		SUBTEST(wait_backward),
> +		SUBTEST(wait_random),
> +	};
> +	int ret;
> +
> +	pr_info("sizeof(dma_fence_chain)=%zu\n",
> +		sizeof(struct dma_fence_chain));
> +
> +	slab_fences = KMEM_CACHE(mock_fence,
> +				 SLAB_TYPESAFE_BY_RCU |
> +				 SLAB_HWCACHE_ALIGN);
> +	if (!slab_fences)
> +		return -ENOMEM;
> +
> +	ret = subtests(tests, NULL);
> +
> +	kmem_cache_destroy(slab_fences);
> +	return ret;
> +}


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf
@ 2020-03-31 21:25 Chris Wilson
  0 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2020-03-31 21:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Under ideal circumstances, the driver should be able to keep the GPU
fully saturated with work. Measure how close to ideal we get under the
harshest of conditions with no user payload.

v2: Also measure throughput using only one thread.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_request.c | 590 +++++++++++++++++-
 2 files changed, 590 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index 3bf7f53e9924..d8da142985eb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -16,5 +16,6 @@
  * Tests are executed in order by igt/i915_selftest
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(request, i915_request_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 1dab0360f76a..3cf0599cec4b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prime_numbers.h>
+#include <linux/pm_qos.h>
 
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
@@ -1239,7 +1240,7 @@ static int live_parallel_engines(void *arg)
 		struct igt_live_test t;
 		unsigned int idx;
 
-		snprintf(name, sizeof(name), "%ps", fn);
+		snprintf(name, sizeof(name), "%ps", *fn);
 		err = igt_live_test_begin(&t, i915, __func__, name);
 		if (err)
 			break;
@@ -1476,3 +1477,590 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+static int switch_to_kernel_sync(struct intel_context *ce, int err)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	while (!err && !intel_engine_is_idle(ce->engine))
+		intel_engine_flush_submission(ce->engine);
+
+	return err;
+}
+
+struct perf_stats {
+	struct intel_engine_cs *engine;
+	unsigned long count;
+	ktime_t time;
+	ktime_t busy;
+	u64 runtime;
+};
+
+struct perf_series {
+	struct drm_i915_private *i915;
+	unsigned int nengines;
+	struct intel_context *ce[];
+};
+
+static int s_sync0(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return err;
+}
+
+static int s_sync1(void *arg)
+{
+	struct perf_series *ps = arg;
+	struct i915_request *prev = NULL;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+	int err = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+
+	return err;
+}
+
+static int s_many(void *arg)
+{
+	struct perf_series *ps = arg;
+	IGT_TIMEOUT(end_time);
+	unsigned int idx = 0;
+
+	GEM_BUG_ON(!ps->nengines);
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ps->ce[idx]);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
+
+		i915_request_add(rq);
+
+		if (++idx == ps->nengines)
+			idx = 0;
+	} while (!__igt_timeout(end_time, NULL));
+
+	return 0;
+}
+
+static int perf_series_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		s_sync0,
+		s_sync1,
+		s_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct perf_stats *stats;
+	struct perf_series *ps;
+	unsigned int idx;
+	int err = 0;
+
+	stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
+	if (!ps) {
+		kfree(stats);
+		return -ENOMEM;
+	}
+
+	ps->i915 = i915;
+	ps->nengines = nengines;
+
+	idx = 0;
+	for_each_uabi_engine(engine, i915) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce))
+			goto out;
+
+		err = intel_context_pin(ce);
+		if (err) {
+			intel_context_put(ce);
+			goto out;
+		}
+
+		ps->ce[idx++] = ce;
+	}
+	GEM_BUG_ON(idx != ps->nengines);
+
+	for (fn = func; *fn && !err; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p =
+				memset(&stats[idx], 0, sizeof(stats[idx]));
+			struct intel_context *ce = ps->ce[idx];
+
+			p->engine = ps->ce[idx]->engine;
+			intel_engine_pm_get(p->engine);
+
+			if (intel_engine_supports_stats(p->engine) &&
+			    !intel_enable_engine_stats(p->engine))
+				p->busy = intel_engine_get_busy_time(p->engine) + 1;
+			p->runtime = -intel_context_get_total_runtime_ns(ce);
+			p->time = ktime_get();
+		}
+
+		err = (*fn)(ps);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+
+		for (idx = 0; idx < nengines; idx++) {
+			struct perf_stats *p = &stats[idx];
+			struct intel_context *ce = ps->ce[idx];
+			int integer, decimal;
+			u64 busy, dt;
+
+			p->time = ktime_sub(ktime_get(), p->time);
+			if (p->busy) {
+				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
+						    p->busy - 1);
+				intel_disable_engine_stats(p->engine);
+			}
+
+			err = switch_to_kernel_sync(ce, err);
+			p->runtime += intel_context_get_total_runtime_ns(ce);
+			intel_engine_pm_put(p->engine);
+
+			busy = 100 * ktime_to_ns(p->busy);
+			dt = ktime_to_ns(p->time);
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, p->engine->name, ce->timeline->seqno,
+				integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+		}
+	}
+
+out:
+	for (idx = 0; idx < nengines; idx++) {
+		if (IS_ERR_OR_NULL(ps->ce[idx]))
+			break;
+
+		intel_context_unpin(ps->ce[idx]);
+		intel_context_put(ps->ce[idx]);
+	}
+	kfree(ps);
+
+	if (qos) {
+		pm_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(stats);
+	return err;
+}
+
+static int p_sync0(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_sync1(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct i915_request *prev = NULL;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(prev);
+		prev = rq;
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_request_put(prev);
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int p_many(void *arg)
+{
+	struct perf_stats *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+	bool busy;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	count = 0;
+	p->time = ktime_get();
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		p_sync0,
+		p_sync1,
+		p_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct {
+		struct perf_stats p;
+		struct task_struct *tsk;
+	} *engines;
+	int err = 0;
+
+	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	for (fn = func; *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			intel_engine_pm_get(engine);
+
+			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+			engines[idx].p.engine = engine;
+
+			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+						       "igt:%s", engine->name);
+			if (IS_ERR(engines[idx].tsk)) {
+				err = PTR_ERR(engines[idx].tsk);
+				intel_engine_pm_put(engine);
+				break;
+			}
+			get_task_struct(engines[idx++].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(engines[idx].tsk))
+				break;
+
+			status = kthread_stop(engines[idx].tsk);
+			if (status && !err)
+				err = status;
+
+			intel_engine_pm_put(engine);
+			put_task_struct(engines[idx++].tsk);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			struct perf_stats *p = &engines[idx].p;
+			u64 busy = 100 * ktime_to_ns(p->busy);
+			u64 dt = ktime_to_ns(p->time);
+			int integer, decimal;
+
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			GEM_BUG_ON(engine != p->engine);
+			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, engine->name, p->count, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+			idx++;
+		}
+	}
+
+	if (qos) {
+		pm_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(engines);
+	return err;
+}
+
+int i915_request_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_series_engines),
+		SUBTEST(perf_parallel_engines),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2020-04-10 16:11 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-03  9:12 [Intel-gfx] [PATCH 01/10] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
2020-04-03  9:12 ` [Intel-gfx] [PATCH 02/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
2020-04-07  9:07   ` Tvrtko Ursulin
2020-04-03  9:12 ` [Intel-gfx] [PATCH 03/10] dma-buf: Prettify typecasts for dma-fence-chain Chris Wilson
2020-04-03  9:12 ` [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
2020-04-08 19:46   ` Venkata Sandeep Dhanalakota
2020-04-08 20:00   ` Lionel Landwerlin
2020-04-09 10:52     ` Chris Wilson
2020-04-09 11:16       ` Lionel Landwerlin
2020-04-09 13:46         ` Chris Wilson
2020-04-03  9:12 ` [Intel-gfx] [PATCH 05/10] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
2020-04-08 19:49   ` Venkata Sandeep Dhanalakota
2020-04-10 16:11   ` Lionel Landwerlin
2020-04-03  9:12 ` [Intel-gfx] [PATCH 06/10] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
2020-04-05 22:14   ` kbuild test robot
2020-04-05 22:14     ` kbuild test robot
2020-04-06 18:32     ` Nick Desaulniers
2020-04-06 18:32       ` Nick Desaulniers
2020-04-03  9:12 ` [Intel-gfx] [PATCH 07/10] drm/syncobj: Allow use of dma-fence-proxy Chris Wilson
2020-04-03  9:12 ` [Intel-gfx] [PATCH 08/10] drm/i915/gem: Teach execbuf how to wait on future syncobj Chris Wilson
2020-04-03  9:12 ` [Intel-gfx] [PATCH 09/10] drm/i915/gem: Allow combining submit-fences with syncobj Chris Wilson
2020-04-07 10:44   ` Tvrtko Ursulin
2020-04-07 10:51     ` Chris Wilson
2020-04-08  9:28       ` Tvrtko Ursulin
2020-04-03  9:13 ` [Intel-gfx] [PATCH 10/10] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
2020-04-07 10:50   ` Tvrtko Ursulin
2020-04-07 10:55     ` Chris Wilson
2020-04-03  9:32 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/10] drm/i915/selftests: Add request throughput measurement to perf Patchwork
2020-04-03  9:58 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-04-03 17:23 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2020-03-31 21:25 [Intel-gfx] [PATCH 01/10] " Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.