All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release
@ 2020-03-02  8:57 Chris Wilson
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 02/22] drm/i915/gt: Prevent allocation on a banned context Chris Wilson
                   ` (24 more replies)
  0 siblings, 25 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:57 UTC (permalink / raw)
  To: intel-gfx

Use the same engine_idle_release() routine for cleaning all old
ctx->engine[] state, closing any potential races with concurrent execbuf
submission.

v2ish: Use the ce->pin_count to close the execbuf gap.

Closes: https://gitlab.freedesktop.org/drm/intel/issues/1241
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 193 +++++++++---------
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |   1 -
 .../gpu/drm/i915/gem/selftests/mock_context.c |   3 +
 3 files changed, 105 insertions(+), 92 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e525ead073f7..cb6b6be48978 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -242,7 +242,6 @@ static void __free_engines(struct i915_gem_engines *e, unsigned int count)
 		if (!e->engines[count])
 			continue;
 
-		RCU_INIT_POINTER(e->engines[count]->gem_context, NULL);
 		intel_context_put(e->engines[count]);
 	}
 	kfree(e);
@@ -255,7 +254,11 @@ static void free_engines(struct i915_gem_engines *e)
 
 static void free_engines_rcu(struct rcu_head *rcu)
 {
-	free_engines(container_of(rcu, struct i915_gem_engines, rcu));
+	struct i915_gem_engines *engines =
+		container_of(rcu, struct i915_gem_engines, rcu);
+
+	i915_sw_fence_fini(&engines->fence);
+	free_engines(engines);
 }
 
 static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
@@ -269,8 +272,6 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 	if (!e)
 		return ERR_PTR(-ENOMEM);
 
-	e->ctx = ctx;
-
 	for_each_engine(engine, gt, id) {
 		struct intel_context *ce;
 
@@ -304,7 +305,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	list_del(&ctx->link);
 	spin_unlock(&ctx->i915->gem.contexts.lock);
 
-	free_engines(rcu_access_pointer(ctx->engines));
 	mutex_destroy(&ctx->engines_mutex);
 
 	if (ctx->timeline)
@@ -491,30 +491,104 @@ static void kill_engines(struct i915_gem_engines *engines)
 static void kill_stale_engines(struct i915_gem_context *ctx)
 {
 	struct i915_gem_engines *pos, *next;
-	unsigned long flags;
 
-	spin_lock_irqsave(&ctx->stale.lock, flags);
+	spin_lock_irq(&ctx->stale.lock);
+	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 	list_for_each_entry_safe(pos, next, &ctx->stale.engines, link) {
-		if (!i915_sw_fence_await(&pos->fence))
+		if (!i915_sw_fence_await(&pos->fence)) {
+			list_del_init(&pos->link);
 			continue;
+		}
 
-		spin_unlock_irqrestore(&ctx->stale.lock, flags);
+		spin_unlock_irq(&ctx->stale.lock);
 
 		kill_engines(pos);
 
-		spin_lock_irqsave(&ctx->stale.lock, flags);
+		spin_lock_irq(&ctx->stale.lock);
+		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
 		list_safe_reset_next(pos, next, link);
 		list_del_init(&pos->link); /* decouple from FENCE_COMPLETE */
 
 		i915_sw_fence_complete(&pos->fence);
 	}
-	spin_unlock_irqrestore(&ctx->stale.lock, flags);
+	spin_unlock_irq(&ctx->stale.lock);
 }
 
 static void kill_context(struct i915_gem_context *ctx)
 {
 	kill_stale_engines(ctx);
-	kill_engines(__context_engines_static(ctx));
+}
+
+static int engines_notify(struct i915_sw_fence *fence,
+			  enum i915_sw_fence_notify state)
+{
+	struct i915_gem_engines *engines =
+		container_of(fence, typeof(*engines), fence);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		if (!list_empty(&engines->link)) {
+			struct i915_gem_context *ctx = engines->ctx;
+			unsigned long flags;
+
+			spin_lock_irqsave(&ctx->stale.lock, flags);
+			list_del(&engines->link);
+			spin_unlock_irqrestore(&ctx->stale.lock, flags);
+		}
+		i915_gem_context_put(engines->ctx);
+		break;
+
+	case FENCE_FREE:
+		init_rcu_head(&engines->rcu);
+		call_rcu(&engines->rcu, free_engines_rcu);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static void engines_idle_release(struct i915_gem_context *ctx,
+				 struct i915_gem_engines *engines)
+{
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+
+	i915_sw_fence_init(&engines->fence, engines_notify);
+	INIT_LIST_HEAD(&engines->link);
+
+	engines->ctx = i915_gem_context_get(ctx);
+
+	for_each_gem_engine(ce, engines, it) {
+		struct dma_fence *fence;
+		int err = 0;
+
+		/* serialises with execbuf */
+		RCU_INIT_POINTER(ce->gem_context, NULL);
+		if (!intel_context_pin_if_active(ce))
+			continue;
+
+		fence = i915_active_fence_get(&ce->timeline->last_request);
+		if (fence) {
+			err = i915_sw_fence_await_dma_fence(&engines->fence,
+							    fence, 0,
+							    GFP_KERNEL);
+			dma_fence_put(fence);
+		}
+		intel_context_unpin(ce);
+		if (err < 0)
+			goto kill;
+	}
+
+	spin_lock_irq(&ctx->stale.lock);
+	if (!i915_gem_context_is_closed(ctx))
+		list_add_tail(&engines->link, &ctx->stale.engines);
+	spin_unlock_irq(&ctx->stale.lock);
+
+kill:
+	if (list_empty(&engines->link)) /* raced, already closed */
+		kill_engines(engines);
+
+	i915_sw_fence_commit(&engines->fence);
 }
 
 static void set_closed_name(struct i915_gem_context *ctx)
@@ -538,11 +612,16 @@ static void context_close(struct i915_gem_context *ctx)
 {
 	struct i915_address_space *vm;
 
+	/* Flush any concurrent set_engines() */
+	mutex_lock(&ctx->engines_mutex);
+	engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1));
 	i915_gem_context_set_closed(ctx);
-	set_closed_name(ctx);
+	mutex_unlock(&ctx->engines_mutex);
 
 	mutex_lock(&ctx->mutex);
 
+	set_closed_name(ctx);
+
 	vm = i915_gem_context_vm(ctx);
 	if (vm)
 		i915_vm_close(vm);
@@ -1626,77 +1705,6 @@ static const i915_user_extension_fn set_engines__extensions[] = {
 	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
 };
 
-static int engines_notify(struct i915_sw_fence *fence,
-			  enum i915_sw_fence_notify state)
-{
-	struct i915_gem_engines *engines =
-		container_of(fence, typeof(*engines), fence);
-
-	switch (state) {
-	case FENCE_COMPLETE:
-		if (!list_empty(&engines->link)) {
-			struct i915_gem_context *ctx = engines->ctx;
-			unsigned long flags;
-
-			spin_lock_irqsave(&ctx->stale.lock, flags);
-			list_del(&engines->link);
-			spin_unlock_irqrestore(&ctx->stale.lock, flags);
-		}
-		break;
-
-	case FENCE_FREE:
-		init_rcu_head(&engines->rcu);
-		call_rcu(&engines->rcu, free_engines_rcu);
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static void engines_idle_release(struct i915_gem_engines *engines)
-{
-	struct i915_gem_engines_iter it;
-	struct intel_context *ce;
-	unsigned long flags;
-
-	GEM_BUG_ON(!engines);
-	i915_sw_fence_init(&engines->fence, engines_notify);
-
-	INIT_LIST_HEAD(&engines->link);
-	spin_lock_irqsave(&engines->ctx->stale.lock, flags);
-	if (!i915_gem_context_is_closed(engines->ctx))
-		list_add(&engines->link, &engines->ctx->stale.engines);
-	spin_unlock_irqrestore(&engines->ctx->stale.lock, flags);
-	if (list_empty(&engines->link)) /* raced, already closed */
-		goto kill;
-
-	for_each_gem_engine(ce, engines, it) {
-		struct dma_fence *fence;
-		int err;
-
-		if (!ce->timeline)
-			continue;
-
-		fence = i915_active_fence_get(&ce->timeline->last_request);
-		if (!fence)
-			continue;
-
-		err = i915_sw_fence_await_dma_fence(&engines->fence,
-						    fence, 0,
-						    GFP_KERNEL);
-
-		dma_fence_put(fence);
-		if (err < 0)
-			goto kill;
-	}
-	goto out;
-
-kill:
-	kill_engines(engines);
-out:
-	i915_sw_fence_commit(&engines->fence);
-}
-
 static int
 set_engines(struct i915_gem_context *ctx,
 	    const struct drm_i915_gem_context_param *args)
@@ -1739,8 +1747,6 @@ set_engines(struct i915_gem_context *ctx,
 	if (!set.engines)
 		return -ENOMEM;
 
-	set.engines->ctx = ctx;
-
 	for (n = 0; n < num_engines; n++) {
 		struct i915_engine_class_instance ci;
 		struct intel_engine_cs *engine;
@@ -1793,6 +1799,11 @@ set_engines(struct i915_gem_context *ctx,
 
 replace:
 	mutex_lock(&ctx->engines_mutex);
+	if (i915_gem_context_is_closed(ctx)) {
+		mutex_unlock(&ctx->engines_mutex);
+		free_engines(set.engines);
+		return -ENOENT;
+	}
 	if (args->size)
 		i915_gem_context_set_user_engines(ctx);
 	else
@@ -1801,7 +1812,7 @@ set_engines(struct i915_gem_context *ctx,
 	mutex_unlock(&ctx->engines_mutex);
 
 	/* Keep track of old engine sets for kill_context() */
-	engines_idle_release(set.engines);
+	engines_idle_release(ctx, set.engines);
 
 	return 0;
 }
@@ -2077,8 +2088,6 @@ static int clone_engines(struct i915_gem_context *dst,
 	if (!clone)
 		goto err_unlock;
 
-	clone->ctx = dst;
-
 	for (n = 0; n < e->num_engines; n++) {
 		struct intel_engine_cs *engine;
 
@@ -2121,8 +2130,7 @@ static int clone_engines(struct i915_gem_context *dst,
 	i915_gem_context_unlock_engines(src);
 
 	/* Serialised by constructor */
-	free_engines(__context_engines_static(dst));
-	RCU_INIT_POINTER(dst->engines, clone);
+	engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1));
 	if (user_engines)
 		i915_gem_context_set_user_engines(dst);
 	else
@@ -2553,6 +2561,9 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
 	const struct i915_gem_engines *e = it->engines;
 	struct intel_context *ctx;
 
+	if (unlikely(!e))
+		return NULL;
+
 	do {
 		if (it->idx >= e->num_engines)
 			return NULL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 3ae61a355d87..57b7ae2893e1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -207,7 +207,6 @@ static inline void
 i915_gem_engines_iter_init(struct i915_gem_engines_iter *it,
 			   struct i915_gem_engines *engines)
 {
-	GEM_BUG_ON(!engines);
 	it->engines = engines;
 	it->idx = 0;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index b12ea1daa29d..e7e3c620f542 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -23,6 +23,9 @@ mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
+	spin_lock_init(&ctx->stale.lock);
+	INIT_LIST_HEAD(&ctx->stale.engines);
+
 	i915_gem_context_set_persistence(ctx);
 
 	mutex_init(&ctx->engines_mutex);
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 02/22] drm/i915/gt: Prevent allocation on a banned context
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
@ 2020-03-02  8:57 ` Chris Wilson
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 03/22] drm/i915/gem: Check that the context wasn't closed during setup Chris Wilson
                   ` (23 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:57 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

If a context is banned even before we submit our first request to it,
report the failure before we attempt to allocate any resources for the
context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 8bb444cda14f..01474d3a558b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -51,6 +51,11 @@ int intel_context_alloc_state(struct intel_context *ce)
 		return -EINTR;
 
 	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+		if (intel_context_is_banned(ce)) {
+			err = -EIO;
+			goto unlock;
+		}
+
 		err = ce->ops->alloc(ce);
 		if (unlikely(err))
 			goto unlock;
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 03/22] drm/i915/gem: Check that the context wasn't closed during setup
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 02/22] drm/i915/gt: Prevent allocation on a banned context Chris Wilson
@ 2020-03-02  8:57 ` Chris Wilson
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 04/22] drm/i915: Drop inspection of execbuf flags during evict Chris Wilson
                   ` (22 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:57 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

As setup takes a long time, the user may close the context during the
construction of the execbuf. In order to make sure we correctly track
all outstanding work with non-persistent contexts, we need to serialise
the submission with the context closure and mop up any leaks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index ac0e5fc5675e..b2a3653b9d5d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2775,9 +2775,18 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 
 	trace_i915_request_queue(eb.request, eb.batch_flags);
 	err = eb_submit(&eb);
+
+	/* Check that the context wasn't destroyed before submission */
+	if (!rcu_access_pointer(eb.context->gem_context)) {
+		err = -ENOENT; /* serious error, supercede any earlier ones */
+		goto err_request;
+	}
+
 err_request:
 	add_to_client(eb.request, file);
 	i915_request_get(eb.request);
+	if (err)
+		i915_request_skip(eb.request, err);
 	i915_request_add(eb.request);
 
 	if (fences)
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 04/22] drm/i915: Drop inspection of execbuf flags during evict
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 02/22] drm/i915/gt: Prevent allocation on a banned context Chris Wilson
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 03/22] drm/i915/gem: Check that the context wasn't closed during setup Chris Wilson
@ 2020-03-02  8:57 ` Chris Wilson
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 05/22] drm/i915/gem: Extract transient execbuf flags from i915_vma Chris Wilson
                   ` (21 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:57 UTC (permalink / raw)
  To: intel-gfx

With the goal of removing the serialisation from around execbuf, we will
no longer have the privilege of there being a single execbuf in flight
at any time and so will only be able to inspect the user's flags within
the carefully controlled execbuf context. i915_gem_evict_for_node() is
the only user outside of execbuf that currently peeks at the flag to
convert an overlapping softpinned request from ENOSPC to EINVAL. Retract
this nicety and only report ENOSPC if the location is in current use,
either due to this execbuf or another.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 16 ++++++++--------
 drivers/gpu/drm/i915/i915_gem_evict.c          | 15 ++++++---------
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b2a3653b9d5d..2cc236c28333 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -562,14 +562,13 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
 }
 
 static int eb_reserve_vma(const struct i915_execbuffer *eb,
-			  struct i915_vma *vma)
+			  struct i915_vma *vma,
+			  u64 pin_flags)
 {
 	struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
 	unsigned int exec_flags = *vma->exec_flags;
-	u64 pin_flags;
 	int err;
 
-	pin_flags = PIN_USER | PIN_NONBLOCK;
 	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
 		pin_flags |= PIN_GLOBAL;
 
@@ -583,12 +582,10 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
 		pin_flags |= PIN_MAPPABLE;
 
-	if (exec_flags & EXEC_OBJECT_PINNED) {
+	if (exec_flags & EXEC_OBJECT_PINNED)
 		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
-		pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
-	} else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
+	else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
 		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
-	}
 
 	err = i915_vma_pin(vma,
 			   entry->pad_to_size, entry->alignment,
@@ -621,6 +618,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 static int eb_reserve(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
+	unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
 	struct list_head last;
 	struct i915_vma *vma;
 	unsigned int i, pass;
@@ -644,7 +642,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
 	err = 0;
 	do {
 		list_for_each_entry(vma, &eb->unbound, exec_link) {
-			err = eb_reserve_vma(eb, vma);
+			err = eb_reserve_vma(eb, vma, pin_flags);
 			if (err)
 				break;
 		}
@@ -694,6 +692,8 @@ static int eb_reserve(struct i915_execbuffer *eb)
 		default:
 			return -ENOSPC;
 		}
+
+		pin_flags = PIN_USER;
 	} while (1);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 1f53cac7271b..4518b9b35c3d 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -290,7 +290,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 		GEM_BUG_ON(!drm_mm_node_allocated(node));
 		vma = container_of(node, typeof(*vma), node);
 
-		/* If we are using coloring to insert guard pages between
+		/*
+		 * If we are using coloring to insert guard pages between
 		 * different cache domains within the address space, we have
 		 * to check whether the objects on either side of our range
 		 * abutt and conflict. If they are in conflict, then we evict
@@ -307,22 +308,18 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 			}
 		}
 
-		if (flags & PIN_NONBLOCK &&
-		    (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) {
+		if (i915_vma_is_pinned(vma)) {
 			ret = -ENOSPC;
 			break;
 		}
 
-		/* Overlap of objects in the same batch? */
-		if (i915_vma_is_pinned(vma)) {
+		if (flags & PIN_NONBLOCK && i915_vma_is_active(vma)) {
 			ret = -ENOSPC;
-			if (vma->exec_flags &&
-			    *vma->exec_flags & EXEC_OBJECT_PINNED)
-				ret = -EINVAL;
 			break;
 		}
 
-		/* Never show fear in the face of dragons!
+		/*
+		 * Never show fear in the face of dragons!
 		 *
 		 * We cannot directly remove this node from within this
 		 * iterator and as with i915_gem_evict_something() we employ
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 05/22] drm/i915/gem: Extract transient execbuf flags from i915_vma
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (2 preceding siblings ...)
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 04/22] drm/i915: Drop inspection of execbuf flags during evict Chris Wilson
@ 2020-03-02  8:57 ` Chris Wilson
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 06/22] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl Chris Wilson
                   ` (20 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:57 UTC (permalink / raw)
  To: intel-gfx

For our convenience, and to avoid frequent allocations, we placed some
lists we use for execbuf inside the common i915_vma struct. As we look
to parallelise execbuf, such fields guarded by the struct_mutex BKL must
be pulled under local control. Instead of using the i915_vma as our
primary means of tracking the user's list of objects and their virtual
mappings, we use a local eb_vma with the same lists as before (just now
local not global).

This should allow us to only perform the lookup of vma used for
execution once during the execbuf ioctl, as currently we need to remove
our secrets from inside i915_vma everytime we drop the struct_mutex as
another execbuf may use the shared locations.

Once potential user visible consequence is that we can remove the
requirement that the execobj[] be unique, and only require that they do
not conflict (i.e. you cannot softpin the same object into two locations.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 277 +++++++++---------
 drivers/gpu/drm/i915/i915_vma_types.h         |  11 -
 2 files changed, 131 insertions(+), 157 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 2cc236c28333..9c888561a636 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -27,6 +27,19 @@
 #include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 
+struct eb_vma {
+	struct i915_vma *vma;
+	unsigned int flags;
+
+	/** This vma's place in the execbuf reservation list */
+	struct drm_i915_gem_exec_object2 *exec;
+	struct list_head bind_link;
+	struct list_head reloc_link;
+
+	struct hlist_node node;
+	u32 handle;
+};
+
 enum {
 	FORCE_CPU_RELOC = 1,
 	FORCE_GTT_RELOC,
@@ -219,15 +232,14 @@ struct i915_execbuffer {
 	struct drm_file *file; /** per-file lookup tables and limits */
 	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
 	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
-	struct i915_vma **vma;
-	unsigned int *flags;
+	struct eb_vma *vma;
 
 	struct intel_engine_cs *engine; /** engine to queue the request to */
 	struct intel_context *context; /* logical state for the request */
 	struct i915_gem_context *gem_context; /** caller's context */
 
 	struct i915_request *request; /** our request to build */
-	struct i915_vma *batch; /** identity of the batch obj/vma */
+	struct eb_vma *batch; /** identity of the batch obj/vma */
 	struct i915_vma *trampoline; /** trampoline used for chaining */
 
 	/** actual size of execobj[] as we may extend it for the cmdparser */
@@ -275,8 +287,6 @@ struct i915_execbuffer {
 	struct hlist_head *buckets; /** ht for relocation handles */
 };
 
-#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
-
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
 	return intel_engine_requires_cmd_parser(eb->engine) ||
@@ -363,9 +373,9 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 static inline bool
 eb_pin_vma(struct i915_execbuffer *eb,
 	   const struct drm_i915_gem_exec_object2 *entry,
-	   struct i915_vma *vma)
+	   struct eb_vma *ev)
 {
-	unsigned int exec_flags = *vma->exec_flags;
+	struct i915_vma *vma = ev->vma;
 	u64 pin_flags;
 
 	if (vma->node.size)
@@ -374,24 +384,24 @@ eb_pin_vma(struct i915_execbuffer *eb,
 		pin_flags = entry->offset & PIN_OFFSET_MASK;
 
 	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
-	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
+	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
 		pin_flags |= PIN_GLOBAL;
 
 	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
 		return false;
 
-	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
+	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
 		if (unlikely(i915_vma_pin_fence(vma))) {
 			i915_vma_unpin(vma);
 			return false;
 		}
 
 		if (vma->fence)
-			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
+			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
-	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
-	return !eb_vma_misplaced(entry, vma, exec_flags);
+	ev->flags |= __EXEC_OBJECT_HAS_PIN;
+	return !eb_vma_misplaced(entry, vma, ev->flags);
 }
 
 static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
@@ -405,13 +415,13 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 }
 
 static inline void
-eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
+eb_unreserve_vma(struct eb_vma *ev)
 {
-	if (!(*flags & __EXEC_OBJECT_HAS_PIN))
+	if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
 		return;
 
-	__eb_unreserve_vma(vma, *flags);
-	*flags &= ~__EXEC_OBJECT_RESERVED;
+	__eb_unreserve_vma(ev->vma, ev->flags);
+	ev->flags &= ~__EXEC_OBJECT_RESERVED;
 }
 
 static int
@@ -419,7 +429,6 @@ eb_validate_vma(struct i915_execbuffer *eb,
 		struct drm_i915_gem_exec_object2 *entry,
 		struct i915_vma *vma)
 {
-	struct drm_i915_private *i915 = eb->i915;
 	if (unlikely(entry->flags & eb->invalid_flags))
 		return -EINVAL;
 
@@ -441,14 +450,6 @@ eb_validate_vma(struct i915_execbuffer *eb,
 	} else {
 		entry->pad_to_size = 0;
 	}
-
-	if (unlikely(vma->exec_flags)) {
-		drm_dbg(&i915->drm,
-			"Object [handle %d, index %d] appears more than once in object list\n",
-			entry->handle, (int)(entry - eb->exec));
-		return -EINVAL;
-	}
-
 	/*
 	 * From drm_mm perspective address space is continuous,
 	 * so from this point we're always using non-canonical
@@ -477,6 +478,7 @@ eb_add_vma(struct i915_execbuffer *eb,
 	   struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+	struct eb_vma *ev = &eb->vma[i];
 	int err;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
@@ -487,25 +489,19 @@ eb_add_vma(struct i915_execbuffer *eb,
 			return err;
 	}
 
+	ev->vma = vma;
+	ev->exec = entry;
+	ev->flags = entry->flags;
+
 	if (eb->lut_size > 0) {
-		vma->exec_handle = entry->handle;
-		hlist_add_head(&vma->exec_node,
+		ev->handle = entry->handle;
+		hlist_add_head(&ev->node,
 			       &eb->buckets[hash_32(entry->handle,
 						    eb->lut_size)]);
 	}
 
 	if (entry->relocation_count)
-		list_add_tail(&vma->reloc_link, &eb->relocs);
-
-	/*
-	 * Stash a pointer from the vma to execobj, so we can query its flags,
-	 * size, alignment etc as provided by the user. Also we stash a pointer
-	 * to the vma inside the execobj so that we can use a direct lookup
-	 * to find the right target VMA when doing relocations.
-	 */
-	eb->vma[i] = vma;
-	eb->flags[i] = entry->flags;
-	vma->exec_flags = &eb->flags[i];
+		list_add_tail(&ev->reloc_link, &eb->relocs);
 
 	/*
 	 * SNA is doing fancy tricks with compressing batch buffers, which leads
@@ -518,28 +514,26 @@ eb_add_vma(struct i915_execbuffer *eb,
 	 */
 	if (i == batch_idx) {
 		if (entry->relocation_count &&
-		    !(eb->flags[i] & EXEC_OBJECT_PINNED))
-			eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
+		    !(ev->flags & EXEC_OBJECT_PINNED))
+			ev->flags |= __EXEC_OBJECT_NEEDS_BIAS;
 		if (eb->reloc_cache.has_fence)
-			eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
+			ev->flags |= EXEC_OBJECT_NEEDS_FENCE;
 
-		eb->batch = vma;
+		eb->batch = ev;
 	}
 
 	err = 0;
-	if (eb_pin_vma(eb, entry, vma)) {
+	if (eb_pin_vma(eb, entry, ev)) {
 		if (entry->offset != vma->node.start) {
 			entry->offset = vma->node.start | UPDATE;
 			eb->args->flags |= __EXEC_HAS_RELOC;
 		}
 	} else {
-		eb_unreserve_vma(vma, vma->exec_flags);
+		eb_unreserve_vma(ev);
 
-		list_add_tail(&vma->exec_link, &eb->unbound);
+		list_add_tail(&ev->bind_link, &eb->unbound);
 		if (drm_mm_node_allocated(&vma->node))
 			err = i915_vma_unbind(vma);
-		if (unlikely(err))
-			vma->exec_flags = NULL;
 	}
 	return err;
 }
@@ -562,11 +556,12 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
 }
 
 static int eb_reserve_vma(const struct i915_execbuffer *eb,
-			  struct i915_vma *vma,
+			  struct eb_vma *ev,
 			  u64 pin_flags)
 {
-	struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
-	unsigned int exec_flags = *vma->exec_flags;
+	struct drm_i915_gem_exec_object2 *entry = ev->exec;
+	unsigned int exec_flags = ev->flags;
+	struct i915_vma *vma = ev->vma;
 	int err;
 
 	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
@@ -609,8 +604,8 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
-	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
-	GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
+	ev->flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
+	GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
 
 	return 0;
 }
@@ -620,7 +615,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
 	const unsigned int count = eb->buffer_count;
 	unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
 	struct list_head last;
-	struct i915_vma *vma;
+	struct eb_vma *ev;
 	unsigned int i, pass;
 	int err;
 
@@ -641,8 +636,8 @@ static int eb_reserve(struct i915_execbuffer *eb)
 	pass = 0;
 	err = 0;
 	do {
-		list_for_each_entry(vma, &eb->unbound, exec_link) {
-			err = eb_reserve_vma(eb, vma, pin_flags);
+		list_for_each_entry(ev, &eb->unbound, bind_link) {
+			err = eb_reserve_vma(eb, ev, pin_flags);
 			if (err)
 				break;
 		}
@@ -653,26 +648,27 @@ static int eb_reserve(struct i915_execbuffer *eb)
 		INIT_LIST_HEAD(&eb->unbound);
 		INIT_LIST_HEAD(&last);
 		for (i = 0; i < count; i++) {
-			unsigned int flags = eb->flags[i];
-			struct i915_vma *vma = eb->vma[i];
+			unsigned int flags;
 
+			ev = &eb->vma[i];
+			flags = ev->flags;
 			if (flags & EXEC_OBJECT_PINNED &&
 			    flags & __EXEC_OBJECT_HAS_PIN)
 				continue;
 
-			eb_unreserve_vma(vma, &eb->flags[i]);
+			eb_unreserve_vma(ev);
 
 			if (flags & EXEC_OBJECT_PINNED)
 				/* Pinned must have their slot */
-				list_add(&vma->exec_link, &eb->unbound);
+				list_add(&ev->bind_link, &eb->unbound);
 			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
 				/* Map require the lowest 256MiB (aperture) */
-				list_add_tail(&vma->exec_link, &eb->unbound);
+				list_add_tail(&ev->bind_link, &eb->unbound);
 			else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
 				/* Prioritise 4GiB region for restricted bo */
-				list_add(&vma->exec_link, &last);
+				list_add(&ev->bind_link, &last);
 			else
-				list_add_tail(&vma->exec_link, &last);
+				list_add_tail(&ev->bind_link, &last);
 		}
 		list_splice_tail(&last, &eb->unbound);
 
@@ -790,10 +786,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		if (unlikely(err))
 			goto err_vma;
 
-		GEM_BUG_ON(vma != eb->vma[i]);
-		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
-			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
+			   eb_vma_misplaced(&eb->exec[i], vma, eb->vma[i].flags));
 	}
 
 	mutex_unlock(&eb->gem_context->mutex);
@@ -804,27 +798,27 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 err_obj:
 	i915_gem_object_put(obj);
 err_vma:
-	eb->vma[i] = NULL;
+	eb->vma[i].vma = NULL;
 err_ctx:
 	mutex_unlock(&eb->gem_context->mutex);
 	return err;
 }
 
-static struct i915_vma *
+static struct eb_vma *
 eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 {
 	if (eb->lut_size < 0) {
 		if (handle >= -eb->lut_size)
 			return NULL;
-		return eb->vma[handle];
+		return &eb->vma[handle];
 	} else {
 		struct hlist_head *head;
-		struct i915_vma *vma;
+		struct eb_vma *ev;
 
 		head = &eb->buckets[hash_32(handle, eb->lut_size)];
-		hlist_for_each_entry(vma, head, exec_node) {
-			if (vma->exec_handle == handle)
-				return vma;
+		hlist_for_each_entry(ev, head, node) {
+			if (ev->handle == handle)
+				return ev;
 		}
 		return NULL;
 	}
@@ -836,20 +830,18 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 	unsigned int i;
 
 	for (i = 0; i < count; i++) {
-		struct i915_vma *vma = eb->vma[i];
-		unsigned int flags = eb->flags[i];
+		struct eb_vma *ev = &eb->vma[i];
+		struct i915_vma *vma = ev->vma;
 
 		if (!vma)
 			break;
 
-		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
-		vma->exec_flags = NULL;
-		eb->vma[i] = NULL;
+		eb->vma[i].vma = NULL;
 
-		if (flags & __EXEC_OBJECT_HAS_PIN)
-			__eb_unreserve_vma(vma, flags);
+		if (ev->flags & __EXEC_OBJECT_HAS_PIN)
+			__eb_unreserve_vma(vma, ev->flags);
 
-		if (flags & __EXEC_OBJECT_HAS_REF)
+		if (ev->flags & __EXEC_OBJECT_HAS_REF)
 			i915_vma_put(vma);
 	}
 }
@@ -1328,11 +1320,11 @@ relocate_entry(struct i915_vma *vma,
 
 static u64
 eb_relocate_entry(struct i915_execbuffer *eb,
-		  struct i915_vma *vma,
+		  struct eb_vma *ev,
 		  const struct drm_i915_gem_relocation_entry *reloc)
 {
 	struct drm_i915_private *i915 = eb->i915;
-	struct i915_vma *target;
+	struct eb_vma *target;
 	int err;
 
 	/* we've already hold a reference to all valid objects */
@@ -1364,7 +1356,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	}
 
 	if (reloc->write_domain) {
-		*target->exec_flags |= EXEC_OBJECT_WRITE;
+		target->flags |= EXEC_OBJECT_WRITE;
 
 		/*
 		 * Sandybridge PPGTT errata: We need a global gtt mapping
@@ -1374,7 +1366,8 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 		 */
 		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 		    IS_GEN(eb->i915, 6)) {
-			err = i915_vma_bind(target, target->obj->cache_level,
+			err = i915_vma_bind(target->vma,
+					    target->vma->obj->cache_level,
 					    PIN_GLOBAL, NULL);
 			if (WARN_ONCE(err,
 				      "Unexpected failure to bind target VMA!"))
@@ -1387,17 +1380,17 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	 * more work needs to be done.
 	 */
 	if (!DBG_FORCE_RELOC &&
-	    gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
+	    gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
 		return 0;
 
 	/* Check that the relocation address is valid... */
 	if (unlikely(reloc->offset >
-		     vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
+		     ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
 		drm_dbg(&i915->drm, "Relocation beyond object bounds: "
 			  "target %d offset %d size %d.\n",
 			  reloc->target_handle,
 			  (int)reloc->offset,
-			  (int)vma->size);
+			  (int)ev->vma->size);
 		return -EINVAL;
 	}
 	if (unlikely(reloc->offset & 3)) {
@@ -1416,18 +1409,18 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 	 * do relocations we are already stalling, disable the user's opt
 	 * out of our synchronisation.
 	 */
-	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
+	ev->flags &= ~EXEC_OBJECT_ASYNC;
 
 	/* and update the user's relocation entry */
-	return relocate_entry(vma, reloc, eb, target);
+	return relocate_entry(ev->vma, reloc, eb, target->vma);
 }
 
-static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
+static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
 {
 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
 	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
 	struct drm_i915_gem_relocation_entry __user *urelocs;
-	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
+	const struct drm_i915_gem_exec_object2 *entry = ev->exec;
 	unsigned int remain;
 
 	urelocs = u64_to_user_ptr(entry->relocs_ptr);
@@ -1467,7 +1460,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 
 		remain -= count;
 		do {
-			u64 offset = eb_relocate_entry(eb, vma, r);
+			u64 offset = eb_relocate_entry(eb, ev, r);
 
 			if (likely(offset == 0)) {
 			} else if ((s64)offset < 0) {
@@ -1510,16 +1503,16 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 }
 
 static int
-eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
+eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
 {
-	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
+	const struct drm_i915_gem_exec_object2 *entry = ev->exec;
 	struct drm_i915_gem_relocation_entry *relocs =
 		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
 	unsigned int i;
 	int err;
 
 	for (i = 0; i < entry->relocation_count; i++) {
-		u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
+		u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
 
 		if ((s64)offset < 0) {
 			err = (int)offset;
@@ -1660,7 +1653,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 {
 	struct drm_device *dev = &eb->i915->drm;
 	bool have_copy = false;
-	struct i915_vma *vma;
+	struct eb_vma *ev;
 	int err = 0;
 
 repeat:
@@ -1716,15 +1709,15 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 
 	GEM_BUG_ON(!eb->batch);
 
-	list_for_each_entry(vma, &eb->relocs, reloc_link) {
+	list_for_each_entry(ev, &eb->relocs, reloc_link) {
 		if (!have_copy) {
 			pagefault_disable();
-			err = eb_relocate_vma(eb, vma);
+			err = eb_relocate_vma(eb, ev);
 			pagefault_enable();
 			if (err)
 				goto repeat;
 		} else {
-			err = eb_relocate_vma_slow(eb, vma);
+			err = eb_relocate_vma_slow(eb, ev);
 			if (err)
 				goto err;
 		}
@@ -1769,10 +1762,10 @@ static int eb_relocate(struct i915_execbuffer *eb)
 
 	/* The objects are in their final locations, apply the relocations. */
 	if (eb->args->flags & __EXEC_HAS_RELOC) {
-		struct i915_vma *vma;
+		struct eb_vma *ev;
 
-		list_for_each_entry(vma, &eb->relocs, reloc_link) {
-			if (eb_relocate_vma(eb, vma))
+		list_for_each_entry(ev, &eb->relocs, reloc_link) {
+			if (eb_relocate_vma(eb, ev))
 				goto slow;
 		}
 	}
@@ -1793,27 +1786,19 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 	ww_acquire_init(&acquire, &reservation_ww_class);
 
 	for (i = 0; i < count; i++) {
-		struct i915_vma *vma = eb->vma[i];
+		struct eb_vma *ev = &eb->vma[i];
+		struct i915_vma *vma = ev->vma;
 
 		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
-		if (!err)
-			continue;
-
-		GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */
-
 		if (err == -EDEADLK) {
 			GEM_BUG_ON(i == 0);
 			do {
 				int j = i - 1;
 
-				ww_mutex_unlock(&eb->vma[j]->resv->lock);
+				ww_mutex_unlock(&eb->vma[j].vma->resv->lock);
 
-				swap(eb->flags[i], eb->flags[j]);
 				swap(eb->vma[i],  eb->vma[j]);
-				eb->vma[i]->exec_flags = &eb->flags[i];
 			} while (--i);
-			GEM_BUG_ON(vma != eb->vma[0]);
-			vma->exec_flags = &eb->flags[0];
 
 			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
 							       &acquire);
@@ -1824,8 +1809,9 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 	ww_acquire_done(&acquire);
 
 	while (i--) {
-		unsigned int flags = eb->flags[i];
-		struct i915_vma *vma = eb->vma[i];
+		struct eb_vma *ev = &eb->vma[i];
+		struct i915_vma *vma = ev->vma;
+		unsigned int flags = ev->flags;
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		assert_vma_held(vma);
@@ -1869,10 +1855,10 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		i915_vma_unlock(vma);
 
 		__eb_unreserve_vma(vma, flags);
-		vma->exec_flags = NULL;
-
 		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
 			i915_vma_put(vma);
+
+		ev->vma = NULL;
 	}
 	ww_acquire_fini(&acquire);
 
@@ -2007,7 +1993,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 	if (!pw)
 		return -ENOMEM;
 
-	err = i915_active_acquire(&eb->batch->active);
+	err = i915_active_acquire(&eb->batch->vma->active);
 	if (err)
 		goto err_free;
 
@@ -2024,7 +2010,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 	dma_fence_work_init(&pw->base, &eb_parse_ops);
 
 	pw->engine = eb->engine;
-	pw->batch = eb->batch;
+	pw->batch = eb->batch->vma;
 	pw->batch_offset = eb->batch_start_offset;
 	pw->batch_length = eb->batch_len;
 	pw->shadow = shadow;
@@ -2066,7 +2052,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 err_shadow:
 	i915_active_release(&shadow->active);
 err_batch:
-	i915_active_release(&eb->batch->active);
+	i915_active_release(&eb->batch->vma->active);
 err_free:
 	kfree(pw);
 	return err;
@@ -2129,15 +2115,13 @@ static int eb_parse(struct i915_execbuffer *eb)
 	if (err)
 		goto err_trampoline;
 
-	eb->vma[eb->buffer_count] = i915_vma_get(shadow);
-	eb->flags[eb->buffer_count] =
+	eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
+	eb->vma[eb->buffer_count].flags =
 		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
-	shadow->exec_flags = &eb->flags[eb->buffer_count];
-	eb->buffer_count++;
+	eb->batch = &eb->vma[eb->buffer_count++];
 
 	eb->trampoline = trampoline;
 	eb->batch_start_offset = 0;
-	eb->batch = shadow;
 
 	shadow->private = pool;
 	return 0;
@@ -2164,7 +2148,7 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static int eb_submit(struct i915_execbuffer *eb)
+static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
 {
 	int err;
 
@@ -2191,7 +2175,7 @@ static int eb_submit(struct i915_execbuffer *eb)
 	}
 
 	err = eb->engine->emit_bb_start(eb->request,
-					eb->batch->node.start +
+					batch->node.start +
 					eb->batch_start_offset,
 					eb->batch_len,
 					eb->batch_flags);
@@ -2578,6 +2562,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	struct dma_fence *in_fence = NULL;
 	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
+	struct i915_vma *batch;
 	int out_fence_fd = -1;
 	int err;
 
@@ -2592,9 +2577,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		args->flags |= __EXEC_HAS_RELOC;
 
 	eb.exec = exec;
-	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
-	eb.vma[0] = NULL;
-	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
+	eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
+	eb.vma[0].vma = NULL;
 
 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
 	reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -2679,21 +2663,23 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		goto err_vma;
 	}
 
-	if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
+	if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
 		drm_dbg(&i915->drm,
 			"Attempting to use self-modifying batch buffer\n");
 		err = -EINVAL;
 		goto err_vma;
 	}
-	if (eb.batch_start_offset > eb.batch->size ||
-	    eb.batch_len > eb.batch->size - eb.batch_start_offset) {
+
+	if (range_overflows_t(u64,
+			      eb.batch_start_offset, eb.batch_len,
+			      eb.batch->vma->size)) {
 		drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
 		err = -EINVAL;
 		goto err_vma;
 	}
 
 	if (eb.batch_len == 0)
-		eb.batch_len = eb.batch->size - eb.batch_start_offset;
+		eb.batch_len = eb.batch->vma->size - eb.batch_start_offset;
 
 	err = eb_parse(&eb);
 	if (err)
@@ -2703,6 +2689,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
+	batch = eb.batch->vma;
 	if (eb.batch_flags & I915_DISPATCH_SECURE) {
 		struct i915_vma *vma;
 
@@ -2716,13 +2703,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		 *   fitting due to fragmentation.
 		 * So this is actually safe.
 		 */
-		vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0);
+		vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
 			goto err_parse;
 		}
 
-		eb.batch = vma;
+		batch = vma;
 	}
 
 	/* All GPU relocation batches must be submitted prior to the user rq */
@@ -2769,12 +2756,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 * inactive_list and lose its active reference. Hence we do not need
 	 * to explicitly hold another reference here.
 	 */
-	eb.request->batch = eb.batch;
-	if (eb.batch->private)
-		intel_engine_pool_mark_active(eb.batch->private, eb.request);
+	eb.request->batch = batch;
+	if (batch->private)
+		intel_engine_pool_mark_active(batch->private, eb.request);
 
 	trace_i915_request_queue(eb.request, eb.batch_flags);
-	err = eb_submit(&eb);
+	err = eb_submit(&eb, batch);
 
 	/* Check that the context wasn't destroyed before submission */
 	if (!rcu_access_pointer(eb.context->gem_context)) {
@@ -2806,10 +2793,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 
 err_batch_unpin:
 	if (eb.batch_flags & I915_DISPATCH_SECURE)
-		i915_vma_unpin(eb.batch);
+		i915_vma_unpin(batch);
 err_parse:
-	if (eb.batch->private)
-		intel_engine_pool_put(eb.batch->private);
+	if (batch->private)
+		intel_engine_pool_put(batch->private);
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
@@ -2834,9 +2821,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 
 static size_t eb_element_size(void)
 {
-	return (sizeof(struct drm_i915_gem_exec_object2) +
-		sizeof(struct i915_vma *) +
-		sizeof(unsigned int));
+	return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
 }
 
 static bool check_buffer_count(size_t count)
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index e0942efd5236..63831cdb7402 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -273,21 +273,10 @@ struct i915_vma {
 	struct rb_node obj_node;
 	struct hlist_node obj_hash;
 
-	/** This vma's place in the execbuf reservation list */
-	struct list_head exec_link;
-	struct list_head reloc_link;
-
 	/** This vma's place in the eviction list */
 	struct list_head evict_link;
 
 	struct list_head closed_link;
-
-	/**
-	 * Used for performing relocations during execbuffer insertion.
-	 */
-	unsigned int *exec_flags;
-	struct hlist_node exec_node;
-	u32 exec_handle;
 };
 
 #endif
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 06/22] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (3 preceding siblings ...)
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 05/22] drm/i915/gem: Extract transient execbuf flags from i915_vma Chris Wilson
@ 2020-03-02  8:57 ` Chris Wilson
  2020-03-02  9:39   ` Maarten Lankhorst
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 07/22] drm/i915/perf: Reintroduce wait on OA configuration completion Chris Wilson
                   ` (19 subsequent siblings)
  24 siblings, 1 reply; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:57 UTC (permalink / raw)
  To: intel-gfx

As we no longer stash anything inside i915_vma under the exclusive
protection of struct_mutex, we do not need to revoke the i915_vma
stashes before dropping struct_mutex to handle pagefaults. Knowing that
we must drop the struct_mutex while keeping the eb->vma around, means
that we are required to hold onto to the object reference until we have
marked the vma as active.

Fixes: 155ab8836caa ("drm/i915: Move object close under its own lock")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 107 +++++++-----------
 1 file changed, 42 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 9c888561a636..577c84872acc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -47,17 +47,15 @@ enum {
 #define DBG_FORCE_RELOC 0 /* choose one of the above! */
 };
 
-#define __EXEC_OBJECT_HAS_REF		BIT(31)
-#define __EXEC_OBJECT_HAS_PIN		BIT(30)
-#define __EXEC_OBJECT_HAS_FENCE		BIT(29)
-#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
-#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
-#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
+#define __EXEC_OBJECT_HAS_PIN		BIT(31)
+#define __EXEC_OBJECT_HAS_FENCE		BIT(30)
+#define __EXEC_OBJECT_NEEDS_MAP		BIT(29)
+#define __EXEC_OBJECT_NEEDS_BIAS	BIT(28)
+#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 28) /* all of the above */
 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
 
 #define __EXEC_HAS_RELOC	BIT(31)
-#define __EXEC_VALIDATED	BIT(30)
-#define __EXEC_INTERNAL_FLAGS	(~0u << 30)
+#define __EXEC_INTERNAL_FLAGS	(~0u << 31)
 #define UPDATE			PIN_OFFSET_FIXED
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -472,24 +470,17 @@ eb_validate_vma(struct i915_execbuffer *eb,
 	return 0;
 }
 
-static int
+static void
 eb_add_vma(struct i915_execbuffer *eb,
 	   unsigned int i, unsigned batch_idx,
 	   struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 	struct eb_vma *ev = &eb->vma[i];
-	int err;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 
-	if (!(eb->args->flags & __EXEC_VALIDATED)) {
-		err = eb_validate_vma(eb, entry, vma);
-		if (unlikely(err))
-			return err;
-	}
-
-	ev->vma = vma;
+	ev->vma = i915_vma_get(vma);
 	ev->exec = entry;
 	ev->flags = entry->flags;
 
@@ -522,7 +513,6 @@ eb_add_vma(struct i915_execbuffer *eb,
 		eb->batch = ev;
 	}
 
-	err = 0;
 	if (eb_pin_vma(eb, entry, ev)) {
 		if (entry->offset != vma->node.start) {
 			entry->offset = vma->node.start | UPDATE;
@@ -530,12 +520,8 @@ eb_add_vma(struct i915_execbuffer *eb,
 		}
 	} else {
 		eb_unreserve_vma(ev);
-
 		list_add_tail(&ev->bind_link, &eb->unbound);
-		if (drm_mm_node_allocated(&vma->node))
-			err = i915_vma_unbind(vma);
 	}
-	return err;
 }
 
 static inline int use_cpu_reloc(const struct reloc_cache *cache,
@@ -582,6 +568,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
 	else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
 		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 
+	if (drm_mm_node_allocated(&vma->node) &&
+	    eb_vma_misplaced(entry, vma, ev->flags)) {
+		err = i915_vma_unbind(vma);
+		if (err)
+			return err;
+	}
+
 	err = i915_vma_pin(vma,
 			   entry->pad_to_size, entry->alignment,
 			   pin_flags);
@@ -641,7 +634,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
 			if (err)
 				break;
 		}
-		if (err != -ENOSPC)
+		if (!(err == -ENOSPC || err == -EAGAIN))
 			return err;
 
 		/* Resort *all* the objects into priority order */
@@ -672,6 +665,11 @@ static int eb_reserve(struct i915_execbuffer *eb)
 		}
 		list_splice_tail(&last, &eb->unbound);
 
+		if (err == -EAGAIN) {
+			flush_workqueue(eb->i915->mm.userptr_wq);
+			continue;
+		}
+
 		switch (pass++) {
 		case 0:
 			break;
@@ -727,17 +725,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	unsigned int i, batch;
 	int err;
 
+	if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
+		return -ENOENT;
+
 	INIT_LIST_HEAD(&eb->relocs);
 	INIT_LIST_HEAD(&eb->unbound);
 
 	batch = eb_batch_index(eb);
 
-	mutex_lock(&eb->gem_context->mutex);
-	if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
-		err = -ENOENT;
-		goto err_ctx;
-	}
-
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
 		struct i915_lut_handle *lut;
@@ -782,25 +777,19 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		i915_gem_object_unlock(obj);
 
 add_vma:
-		err = eb_add_vma(eb, i, batch, vma);
+		err = eb_validate_vma(eb, &eb->exec[i], vma);
 		if (unlikely(err))
 			goto err_vma;
 
-		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
-			   eb_vma_misplaced(&eb->exec[i], vma, eb->vma[i].flags));
+		eb_add_vma(eb, i, batch, vma);
 	}
 
-	mutex_unlock(&eb->gem_context->mutex);
-
-	eb->args->flags |= __EXEC_VALIDATED;
-	return eb_reserve(eb);
+	return 0;
 
 err_obj:
 	i915_gem_object_put(obj);
 err_vma:
 	eb->vma[i].vma = NULL;
-err_ctx:
-	mutex_unlock(&eb->gem_context->mutex);
 	return err;
 }
 
@@ -841,19 +830,10 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 		if (ev->flags & __EXEC_OBJECT_HAS_PIN)
 			__eb_unreserve_vma(vma, ev->flags);
 
-		if (ev->flags & __EXEC_OBJECT_HAS_REF)
-			i915_vma_put(vma);
+		i915_vma_put(vma);
 	}
 }
 
-static void eb_reset_vmas(const struct i915_execbuffer *eb)
-{
-	eb_release_vmas(eb);
-	if (eb->lut_size > 0)
-		memset(eb->buckets, 0,
-		       sizeof(struct hlist_head) << eb->lut_size);
-}
-
 static void eb_destroy(const struct i915_execbuffer *eb)
 {
 	GEM_BUG_ON(eb->reloc_cache.rq);
@@ -1662,8 +1642,6 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 		goto out;
 	}
 
-	/* We may process another execbuffer during the unlock... */
-	eb_reset_vmas(eb);
 	mutex_unlock(&dev->struct_mutex);
 
 	/*
@@ -1702,11 +1680,6 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 		goto out;
 	}
 
-	/* reacquire the objects */
-	err = eb_lookup_vmas(eb);
-	if (err)
-		goto err;
-
 	GEM_BUG_ON(!eb->batch);
 
 	list_for_each_entry(ev, &eb->relocs, reloc_link) {
@@ -1757,8 +1730,17 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 
 static int eb_relocate(struct i915_execbuffer *eb)
 {
-	if (eb_lookup_vmas(eb))
-		goto slow;
+	int err;
+
+	mutex_lock(&eb->gem_context->mutex);
+	err = eb_lookup_vmas(eb);
+	mutex_unlock(&eb->gem_context->mutex);
+	if (err)
+		return err;
+
+	err = eb_reserve(eb);
+	if (err)
+		return err;
 
 	/* The objects are in their final locations, apply the relocations. */
 	if (eb->args->flags & __EXEC_HAS_RELOC) {
@@ -1766,14 +1748,11 @@ static int eb_relocate(struct i915_execbuffer *eb)
 
 		list_for_each_entry(ev, &eb->relocs, reloc_link) {
 			if (eb_relocate_vma(eb, ev))
-				goto slow;
+				return eb_relocate_slow(eb);
 		}
 	}
 
 	return 0;
-
-slow:
-	return eb_relocate_slow(eb);
 }
 
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
@@ -1855,8 +1834,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		i915_vma_unlock(vma);
 
 		__eb_unreserve_vma(vma, flags);
-		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
-			i915_vma_put(vma);
+		i915_vma_put(vma);
 
 		ev->vma = NULL;
 	}
@@ -2116,8 +2094,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 		goto err_trampoline;
 
 	eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
-	eb->vma[eb->buffer_count].flags =
-		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
+	eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
 	eb->batch = &eb->vma[eb->buffer_count++];
 
 	eb->trampoline = trampoline;
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 07/22] drm/i915/perf: Reintroduce wait on OA configuration completion
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (4 preceding siblings ...)
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 06/22] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl Chris Wilson
@ 2020-03-02  8:57 ` Chris Wilson
  2020-03-02 10:29   ` Lionel Landwerlin
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 08/22] drm/i915: Wrap i915_active in a simple kreffed struct Chris Wilson
                   ` (18 subsequent siblings)
  24 siblings, 1 reply; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:57 UTC (permalink / raw)
  To: intel-gfx

We still need to wait for the initial OA configuration to happen
before we enable OA report writes to the OA buffer.

Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 15d0ace1f876 ("drm/i915/perf: execute OA configuration from command stream")
Testcase: igt/perf/stream-open-close
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
This is simply an alternative to storing the request inside the stream.
---
 drivers/gpu/drm/i915/i915_perf.c       | 58 ++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_perf_types.h |  3 +-
 2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2334c45f1d08..1b074bb4a7fe 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1972,9 +1972,10 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 	return i915_vma_get(oa_bo->vma);
 }
 
-static int emit_oa_config(struct i915_perf_stream *stream,
-			  struct i915_oa_config *oa_config,
-			  struct intel_context *ce)
+static struct i915_request *
+emit_oa_config(struct i915_perf_stream *stream,
+	       struct i915_oa_config *oa_config,
+	       struct intel_context *ce)
 {
 	struct i915_request *rq;
 	struct i915_vma *vma;
@@ -1982,7 +1983,7 @@ static int emit_oa_config(struct i915_perf_stream *stream,
 
 	vma = get_oa_vma(stream, oa_config);
 	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+		return ERR_CAST(vma);
 
 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
@@ -2007,13 +2008,17 @@ static int emit_oa_config(struct i915_perf_stream *stream,
 	err = rq->engine->emit_bb_start(rq,
 					vma->node.start, 0,
 					I915_DISPATCH_SECURE);
+	if (err)
+		goto err_add_request;
+
+	i915_request_get(rq);
 err_add_request:
 	i915_request_add(rq);
 err_vma_unpin:
 	i915_vma_unpin(vma);
 err_vma_put:
 	i915_vma_put(vma);
-	return err;
+	return err ? ERR_PTR(err) : rq;
 }
 
 static struct intel_context *oa_context(struct i915_perf_stream *stream)
@@ -2021,7 +2026,8 @@ static struct intel_context *oa_context(struct i915_perf_stream *stream)
 	return stream->pinned_ctx ?: stream->engine->kernel_context;
 }
 
-static int hsw_enable_metric_set(struct i915_perf_stream *stream)
+static struct i915_request *
+hsw_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
 
@@ -2426,7 +2432,8 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
 	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
 }
 
-static int gen8_enable_metric_set(struct i915_perf_stream *stream)
+static struct i915_request *
+gen8_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
@@ -2468,7 +2475,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
 	 */
 	ret = lrc_configure_all_contexts(stream, oa_config);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
 	return emit_oa_config(stream, oa_config, oa_context(stream));
 }
@@ -2480,7 +2487,8 @@ static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
 			     0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
 }
 
-static int gen12_enable_metric_set(struct i915_perf_stream *stream)
+static struct i915_request *
+gen12_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
@@ -2511,7 +2519,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
 	 */
 	ret = gen12_configure_all_contexts(stream, oa_config);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
 	/*
 	 * For Gen12, performance counters are context
@@ -2521,7 +2529,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
 	if (stream->ctx) {
 		ret = gen12_configure_oar_context(stream, true);
 		if (ret)
-			return ret;
+			return ERR_PTR(ret);
 	}
 
 	return emit_oa_config(stream, oa_config, oa_context(stream));
@@ -2719,6 +2727,20 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = {
 	.read = i915_oa_read,
 };
 
+static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
+{
+	struct i915_request *rq;
+
+	rq = stream->perf->ops.enable_metric_set(stream);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+	i915_request_put(rq);
+
+	return 0;
+}
+
 /**
  * i915_oa_stream_init - validate combined props for OA stream and init
  * @stream: An i915 perf stream
@@ -2853,7 +2875,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	stream->ops = &i915_oa_stream_ops;
 	WRITE_ONCE(perf->exclusive_stream, stream);
 
-	ret = perf->ops.enable_metric_set(stream);
+	ret = i915_perf_stream_enable_sync(stream);
 	if (ret) {
 		DRM_DEBUG("Unable to enable metric set\n");
 		goto err_enable;
@@ -3170,7 +3192,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		return -EINVAL;
 
 	if (config != stream->oa_config) {
-		int err;
+		struct i915_request *rq;
 
 		/*
 		 * If OA is bound to a specific context, emit the
@@ -3181,11 +3203,13 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		 * When set globally, we use a low priority kernel context,
 		 * so it will effectively take effect when idle.
 		 */
-		err = emit_oa_config(stream, config, oa_context(stream));
-		if (err == 0)
+		rq = emit_oa_config(stream, config, oa_context(stream));
+		if (!IS_ERR(rq)) {
 			config = xchg(&stream->oa_config, config);
-		else
-			ret = err;
+			i915_request_put(rq);
+		} else {
+			ret = PTR_ERR(rq);
+		}
 	}
 
 	i915_oa_config_put(config);
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 45e581455f5d..a0e22f00f6cf 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -339,7 +339,8 @@ struct i915_oa_ops {
 	 * counter reports being sampled. May apply system constraints such as
 	 * disabling EU clock gating as required.
 	 */
-	int (*enable_metric_set)(struct i915_perf_stream *stream);
+	struct i915_request *
+		(*enable_metric_set)(struct i915_perf_stream *stream);
 
 	/**
 	 * @disable_metric_set: Remove system constraints associated with using
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 08/22] drm/i915: Wrap i915_active in a simple kreffed struct
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (5 preceding siblings ...)
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 07/22] drm/i915/perf: Reintroduce wait on OA configuration completion Chris Wilson
@ 2020-03-02  8:57 ` Chris Wilson
  2020-03-02 16:18   ` Mika Kuoppala
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 09/22] drm/i915: Extend i915_request_await_active to use all timelines Chris Wilson
                   ` (17 subsequent siblings)
  24 siblings, 1 reply; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:57 UTC (permalink / raw)
  To: intel-gfx

For conveniences of callers that just want to use an i915_active to
track a wide array of concurrent timelines, wrap the base i915_active
struct inside a kref. This i915_active will self-destruct after use.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_active.c | 53 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_active.h |  4 +++
 2 files changed, 57 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 7b3d6c12ad61..1826de14d2da 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -881,6 +881,59 @@ void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
 	active_fence_cb(fence, cb);
 }
 
+struct auto_active {
+	struct i915_active base;
+	struct kref ref;
+};
+
+struct i915_active *i915_active_get(struct i915_active *ref)
+{
+	struct auto_active *aa = container_of(ref, typeof(*aa), base);
+
+	kref_get(&aa->ref);
+	return &aa->base;
+}
+
+static void auto_release(struct kref *ref)
+{
+	struct auto_active *aa = container_of(ref, typeof(*aa), ref);
+
+	i915_active_fini(&aa->base);
+	kfree(aa);
+}
+
+void i915_active_put(struct i915_active *ref)
+{
+	struct auto_active *aa = container_of(ref, typeof(*aa), base);
+
+	kref_put(&aa->ref, auto_release);
+}
+
+static int auto_active(struct i915_active *ref)
+{
+	i915_active_get(ref);
+	return 0;
+}
+
+static void auto_retire(struct i915_active *ref)
+{
+	i915_active_put(ref);
+}
+
+struct i915_active *i915_active_create(void)
+{
+	struct auto_active *aa;
+
+	aa = kmalloc(sizeof(*aa), GFP_KERNEL);
+	if (!aa)
+		return NULL;
+
+	kref_init(&aa->ref);
+	i915_active_init(&aa->base, auto_active, auto_retire);
+
+	return &aa->base;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_active.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 973ff0447c6c..7e438501333e 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -215,4 +215,8 @@ void i915_request_add_active_barriers(struct i915_request *rq);
 void i915_active_print(struct i915_active *ref, struct drm_printer *m);
 void i915_active_unlock_wait(struct i915_active *ref);
 
+struct i915_active *i915_active_create(void);
+struct i915_active *i915_active_get(struct i915_active *ref);
+void i915_active_put(struct i915_active *ref);
+
 #endif /* _I915_ACTIVE_H_ */
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 09/22] drm/i915: Extend i915_request_await_active to use all timelines
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (6 preceding siblings ...)
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 08/22] drm/i915: Wrap i915_active in a simple kreffed struct Chris Wilson
@ 2020-03-02  8:57 ` Chris Wilson
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 10/22] drm/i915/perf: Schedule oa_config after modifying the contexts Chris Wilson
                   ` (16 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:57 UTC (permalink / raw)
  To: intel-gfx

Extend i915_request_await_active() to be able to asynchronously wait on
all the tracked timelines simultaneously.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_active.c | 51 +++++++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_active.h |  5 ++-
 drivers/gpu/drm/i915/i915_vma.c    |  2 +-
 3 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 1826de14d2da..e659688db043 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -518,23 +518,52 @@ int i915_active_wait(struct i915_active *ref)
 	return 0;
 }
 
-int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
+static int await_active(struct i915_request *rq,
+			struct i915_active_fence *active)
+{
+	struct dma_fence *fence;
+
+	if (is_barrier(active))
+		return 0;
+
+	fence = i915_active_fence_get(active);
+	if (fence) {
+		int err;
+
+		err = i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+int i915_request_await_active(struct i915_request *rq,
+			      struct i915_active *ref,
+			      unsigned int flags)
 {
 	int err = 0;
 
+	/* We must always wait for the exclusive fence! */
 	if (rcu_access_pointer(ref->excl.fence)) {
-		struct dma_fence *fence;
-
-		rcu_read_lock();
-		fence = dma_fence_get_rcu_safe(&ref->excl.fence);
-		rcu_read_unlock();
-		if (fence) {
-			err = i915_request_await_dma_fence(rq, fence);
-			dma_fence_put(fence);
-		}
+		err = await_active(rq, &ref->excl);
+		if (err)
+			return err;
 	}
 
-	/* In the future we may choose to await on all fences */
+	if (flags & I915_ACTIVE_AWAIT_ALL && i915_active_acquire_if_busy(ref)) {
+		struct active_node *it, *n;
+
+		rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+			err = await_active(rq, &it->base);
+			if (err)
+				break;
+		}
+		i915_active_release(ref);
+		if (err)
+			return err;
+	}
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 7e438501333e..e3c13060c4c7 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -183,7 +183,10 @@ static inline bool i915_active_has_exclusive(struct i915_active *ref)
 
 int i915_active_wait(struct i915_active *ref);
 
-int i915_request_await_active(struct i915_request *rq, struct i915_active *ref);
+int i915_request_await_active(struct i915_request *rq,
+			      struct i915_active *ref,
+			      unsigned int flags);
+#define I915_ACTIVE_AWAIT_ALL BIT(0)
 
 int i915_active_acquire(struct i915_active *ref);
 bool i915_active_acquire_if_busy(struct i915_active *ref);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 298ca4316e65..ce23c452e6ac 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1174,7 +1174,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
 	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 
 	/* Wait for the vma to be bound before we start! */
-	err = i915_request_await_active(rq, &vma->active);
+	err = i915_request_await_active(rq, &vma->active, 0);
 	if (err)
 		return err;
 
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 10/22] drm/i915/perf: Schedule oa_config after modifying the contexts
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (7 preceding siblings ...)
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 09/22] drm/i915: Extend i915_request_await_active to use all timelines Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 11/22] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
                   ` (15 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

We wish that the scheduler emit the context modification commands prior
to enabling the oa_config, for which we must explicitly inform it of the
ordering constraints. This is especially important as we now wait for
the final oa_config setup to be completed and as this wait may be on a
distinct context to the state modifications, we need that command packet
to be always last in the queue.

We borrow the i915_active for its ability to track multiple timelines
and the last dma_fence on each; a flexible dma_resv. Keeping track of
each dma_fence is important for us so that we can efficiently schedule
the requests and reprioritise as required.

Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/display/intel_overlay.c  |   8 +-
 drivers/gpu/drm/i915/gt/intel_context_param.c |   2 +-
 drivers/gpu/drm/i915/i915_active.c            |   6 +-
 drivers/gpu/drm/i915/i915_active.h            |   2 +-
 drivers/gpu/drm/i915/i915_perf.c              | 154 +++++++++++-------
 drivers/gpu/drm/i915/i915_perf_types.h        |   5 +-
 drivers/gpu/drm/i915/i915_vma.h               |   2 +-
 drivers/gpu/drm/i915/selftests/i915_active.c  |   4 +-
 8 files changed, 115 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 3b0cb3534e2a..733dcfc28a05 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -272,7 +272,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 
 	i915_request_add(rq);
 
-	return i915_active_wait(&overlay->last_flip);
+	return i915_active_wait(&overlay->last_flip, TASK_INTERRUPTIBLE);
 }
 
 static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
@@ -429,14 +429,14 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	intel_overlay_flip_prepare(overlay, NULL);
 	i915_request_add(rq);
 
-	return i915_active_wait(&overlay->last_flip);
+	return i915_active_wait(&overlay->last_flip, TASK_INTERRUPTIBLE);
 }
 
 /* recover from an interruption due to a signal
  * We have to be careful not to repeat work forever an make forward progess. */
 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 {
-	return i915_active_wait(&overlay->last_flip);
+	return i915_active_wait(&overlay->last_flip, TASK_INTERRUPTIBLE);
 }
 
 /* Wait for pending overlay flip and release old frame.
@@ -477,7 +477,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 
 	i915_request_add(rq);
 
-	return i915_active_wait(&overlay->last_flip);
+	return i915_active_wait(&overlay->last_flip, TASK_INTERRUPTIBLE);
 }
 
 void intel_overlay_reset(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c
index 65dcd090245d..903cce8c23c4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_param.c
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.c
@@ -15,7 +15,7 @@ int intel_context_set_ring_size(struct intel_context *ce, long sz)
 	if (intel_context_lock_pinned(ce))
 		return -EINTR;
 
-	err = i915_active_wait(&ce->active);
+	err = i915_active_wait(&ce->active, TASK_INTERRUPTIBLE);
 	if (err < 0)
 		goto unlock;
 
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index e659688db043..12943301df3d 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -496,7 +496,7 @@ static int flush_lazy_signals(struct i915_active *ref)
 	return err;
 }
 
-int i915_active_wait(struct i915_active *ref)
+int i915_active_wait(struct i915_active *ref, int state)
 {
 	int err;
 
@@ -511,7 +511,9 @@ int i915_active_wait(struct i915_active *ref)
 	if (err)
 		return err;
 
-	if (wait_var_event_interruptible(ref, i915_active_is_idle(ref)))
+	if (!i915_active_is_idle(ref) &&
+	    ___wait_var_event(ref, i915_active_is_idle(ref),
+			      state, 0, 0, schedule()))
 		return -EINTR;
 
 	flush_work(&ref->work);
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index e3c13060c4c7..69b5f7a76488 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -181,7 +181,7 @@ static inline bool i915_active_has_exclusive(struct i915_active *ref)
 	return rcu_access_pointer(ref->excl.fence);
 }
 
-int i915_active_wait(struct i915_active *ref);
+int i915_active_wait(struct i915_active *ref, int state);
 
 int i915_request_await_active(struct i915_request *rq,
 			      struct i915_active *ref,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1b074bb4a7fe..214e72670738 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1972,10 +1972,11 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 	return i915_vma_get(oa_bo->vma);
 }
 
-static struct i915_request *
+static int
 emit_oa_config(struct i915_perf_stream *stream,
 	       struct i915_oa_config *oa_config,
-	       struct intel_context *ce)
+	       struct intel_context *ce,
+	       struct i915_active *active)
 {
 	struct i915_request *rq;
 	struct i915_vma *vma;
@@ -1983,7 +1984,7 @@ emit_oa_config(struct i915_perf_stream *stream,
 
 	vma = get_oa_vma(stream, oa_config);
 	if (IS_ERR(vma))
-		return ERR_CAST(vma);
+		return PTR_ERR(vma);
 
 	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
@@ -1997,6 +1998,18 @@ emit_oa_config(struct i915_perf_stream *stream,
 		goto err_vma_unpin;
 	}
 
+	if (!IS_ERR_OR_NULL(active)) {
+		/* After all individual context modifications */
+		err = i915_request_await_active(rq, active,
+						I915_ACTIVE_AWAIT_ALL);
+		if (err)
+			goto err_add_request;
+
+		err = i915_active_add_request(active, rq);
+		if (err)
+			goto err_add_request;
+	}
+
 	i915_vma_lock(vma);
 	err = i915_request_await_object(rq, vma->obj, 0);
 	if (!err)
@@ -2011,14 +2024,13 @@ emit_oa_config(struct i915_perf_stream *stream,
 	if (err)
 		goto err_add_request;
 
-	i915_request_get(rq);
 err_add_request:
 	i915_request_add(rq);
 err_vma_unpin:
 	i915_vma_unpin(vma);
 err_vma_put:
 	i915_vma_put(vma);
-	return err ? ERR_PTR(err) : rq;
+	return err;
 }
 
 static struct intel_context *oa_context(struct i915_perf_stream *stream)
@@ -2026,8 +2038,9 @@ static struct intel_context *oa_context(struct i915_perf_stream *stream)
 	return stream->pinned_ctx ?: stream->engine->kernel_context;
 }
 
-static struct i915_request *
-hsw_enable_metric_set(struct i915_perf_stream *stream)
+static int
+hsw_enable_metric_set(struct i915_perf_stream *stream,
+		      struct i915_active *active)
 {
 	struct intel_uncore *uncore = stream->uncore;
 
@@ -2046,7 +2059,9 @@ hsw_enable_metric_set(struct i915_perf_stream *stream)
 	intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 			 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-	return emit_oa_config(stream, stream->oa_config, oa_context(stream));
+	return emit_oa_config(stream,
+			      stream->oa_config, oa_context(stream),
+			      active);
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2196,8 +2211,10 @@ static int gen8_modify_context(struct intel_context *ce,
 	return err;
 }
 
-static int gen8_modify_self(struct intel_context *ce,
-			    const struct flex *flex, unsigned int count)
+static int
+gen8_modify_self(struct intel_context *ce,
+		 const struct flex *flex, unsigned int count,
+		 struct i915_active *active)
 {
 	struct i915_request *rq;
 	int err;
@@ -2208,8 +2225,17 @@ static int gen8_modify_self(struct intel_context *ce,
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
+	if (!IS_ERR_OR_NULL(active)) {
+		err = i915_active_add_request(active, rq);
+		if (err)
+			goto err_add_request;
+	}
+
 	err = gen8_load_flex(rq, ce, flex, count);
+	if (err)
+		goto err_add_request;
 
+err_add_request:
 	i915_request_add(rq);
 	return err;
 }
@@ -2243,7 +2269,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
 	return err;
 }
 
-static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
+static int gen12_configure_oar_context(struct i915_perf_stream *stream,
+				       struct i915_active *active)
 {
 	int err;
 	struct intel_context *ce = stream->pinned_ctx;
@@ -2252,7 +2279,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
 		{
 			GEN8_OACTXCONTROL,
 			stream->perf->ctx_oactxctrl_offset + 1,
-			enable ? GEN8_OA_COUNTER_RESUME : 0,
+			active ? GEN8_OA_COUNTER_RESUME : 0,
 		},
 	};
 	/* Offsets in regs_lri are not used since this configuration is only
@@ -2264,13 +2291,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
 			GEN12_OAR_OACONTROL,
 			GEN12_OAR_OACONTROL_OFFSET + 1,
 			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
-			(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
+			(active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
 		},
 		{
 			RING_CONTEXT_CONTROL(ce->engine->mmio_base),
 			CTX_CONTEXT_CONTROL,
 			_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
-				      enable ?
+				      active ?
 				      GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
 				      0)
 		},
@@ -2287,7 +2314,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
 		return err;
 
 	/* Apply regs_lri using LRI with pinned context */
-	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
+	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
 }
 
 /*
@@ -2315,9 +2342,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
  * Note: it's only the RCS/Render context that has any OA state.
  * Note: the first flex register passed must always be R_PWR_CLK_STATE
  */
-static int oa_configure_all_contexts(struct i915_perf_stream *stream,
-				     struct flex *regs,
-				     size_t num_regs)
+static int
+oa_configure_all_contexts(struct i915_perf_stream *stream,
+			  struct flex *regs,
+			  size_t num_regs,
+			  struct i915_active *active)
 {
 	struct drm_i915_private *i915 = stream->perf->i915;
 	struct intel_engine_cs *engine;
@@ -2374,7 +2403,7 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream,
 
 		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
 
-		err = gen8_modify_self(ce, regs, num_regs);
+		err = gen8_modify_self(ce, regs, num_regs, active);
 		if (err)
 			return err;
 	}
@@ -2382,8 +2411,10 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream,
 	return 0;
 }
 
-static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
-					const struct i915_oa_config *oa_config)
+static int
+gen12_configure_all_contexts(struct i915_perf_stream *stream,
+			     const struct i915_oa_config *oa_config,
+			     struct i915_active *active)
 {
 	struct flex regs[] = {
 		{
@@ -2392,11 +2423,15 @@ static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
 		},
 	};
 
-	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+	return oa_configure_all_contexts(stream,
+					 regs, ARRAY_SIZE(regs),
+					 active);
 }
 
-static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
-				      const struct i915_oa_config *oa_config)
+static int
+lrc_configure_all_contexts(struct i915_perf_stream *stream,
+			   const struct i915_oa_config *oa_config,
+			   struct i915_active *active)
 {
 	/* The MMIO offsets for Flex EU registers aren't contiguous */
 	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
@@ -2429,11 +2464,14 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
 	for (i = 2; i < ARRAY_SIZE(regs); i++)
 		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
 
-	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+	return oa_configure_all_contexts(stream,
+					 regs, ARRAY_SIZE(regs),
+					 active);
 }
 
-static struct i915_request *
-gen8_enable_metric_set(struct i915_perf_stream *stream)
+static int
+gen8_enable_metric_set(struct i915_perf_stream *stream,
+		       struct i915_active *active)
 {
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
@@ -2473,11 +2511,13 @@ gen8_enable_metric_set(struct i915_perf_stream *stream)
 	 * to make sure all slices/subslices are ON before writing to NOA
 	 * registers.
 	 */
-	ret = lrc_configure_all_contexts(stream, oa_config);
+	ret = lrc_configure_all_contexts(stream, oa_config, active);
 	if (ret)
-		return ERR_PTR(ret);
+		return ret;
 
-	return emit_oa_config(stream, oa_config, oa_context(stream));
+	return emit_oa_config(stream,
+			      stream->oa_config, oa_context(stream),
+			      active);
 }
 
 static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
@@ -2487,8 +2527,9 @@ static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
 			     0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
 }
 
-static struct i915_request *
-gen12_enable_metric_set(struct i915_perf_stream *stream)
+static int
+gen12_enable_metric_set(struct i915_perf_stream *stream,
+			struct i915_active *active)
 {
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
@@ -2517,9 +2558,9 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
 	 * to make sure all slices/subslices are ON before writing to NOA
 	 * registers.
 	 */
-	ret = gen12_configure_all_contexts(stream, oa_config);
+	ret = gen12_configure_all_contexts(stream, oa_config, active);
 	if (ret)
-		return ERR_PTR(ret);
+		return ret;
 
 	/*
 	 * For Gen12, performance counters are context
@@ -2527,12 +2568,14 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
 	 * requested this.
 	 */
 	if (stream->ctx) {
-		ret = gen12_configure_oar_context(stream, true);
+		ret = gen12_configure_oar_context(stream, active);
 		if (ret)
-			return ERR_PTR(ret);
+			return ret;
 	}
 
-	return emit_oa_config(stream, oa_config, oa_context(stream));
+	return emit_oa_config(stream,
+			      stream->oa_config, oa_context(stream),
+			      active);
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
@@ -2540,7 +2583,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream)
 	struct intel_uncore *uncore = stream->uncore;
 
 	/* Reset all contexts' slices/subslices configurations. */
-	lrc_configure_all_contexts(stream, NULL);
+	lrc_configure_all_contexts(stream, NULL, NULL);
 
 	intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
 }
@@ -2550,7 +2593,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream)
 	struct intel_uncore *uncore = stream->uncore;
 
 	/* Reset all contexts' slices/subslices configurations. */
-	lrc_configure_all_contexts(stream, NULL);
+	lrc_configure_all_contexts(stream, NULL, NULL);
 
 	/* Make sure we disable noa to save power. */
 	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2561,11 +2604,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 	struct intel_uncore *uncore = stream->uncore;
 
 	/* Reset all contexts' slices/subslices configurations. */
-	gen12_configure_all_contexts(stream, NULL);
+	gen12_configure_all_contexts(stream, NULL, NULL);
 
 	/* disable the context save/restore or OAR counters */
 	if (stream->ctx)
-		gen12_configure_oar_context(stream, false);
+		gen12_configure_oar_context(stream, NULL);
 
 	/* Make sure we disable noa to save power. */
 	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2729,16 +2772,19 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = {
 
 static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
 {
-	struct i915_request *rq;
+	struct i915_active *active;
+	int err;
 
-	rq = stream->perf->ops.enable_metric_set(stream);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
+	active = i915_active_create();
+	if (!active)
+		return -ENOMEM;
 
-	i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
-	i915_request_put(rq);
+	err = stream->perf->ops.enable_metric_set(stream, active);
+	if (err == 0)
+		i915_active_wait(active, TASK_UNINTERRUPTIBLE);
 
-	return 0;
+	i915_active_put(active);
+	return err;
 }
 
 /**
@@ -3192,7 +3238,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		return -EINVAL;
 
 	if (config != stream->oa_config) {
-		struct i915_request *rq;
+		int err;
 
 		/*
 		 * If OA is bound to a specific context, emit the
@@ -3203,13 +3249,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		 * When set globally, we use a low priority kernel context,
 		 * so it will effectively take effect when idle.
 		 */
-		rq = emit_oa_config(stream, config, oa_context(stream));
-		if (!IS_ERR(rq)) {
+		err = emit_oa_config(stream, config, oa_context(stream), NULL);
+		if (!err)
 			config = xchg(&stream->oa_config, config);
-			i915_request_put(rq);
-		} else {
-			ret = PTR_ERR(rq);
-		}
+		else
+			ret = err;
 	}
 
 	i915_oa_config_put(config);
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index a0e22f00f6cf..5eaf874a0d25 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -21,6 +21,7 @@
 
 struct drm_i915_private;
 struct file;
+struct i915_active;
 struct i915_gem_context;
 struct i915_perf;
 struct i915_vma;
@@ -339,8 +340,8 @@ struct i915_oa_ops {
 	 * counter reports being sampled. May apply system constraints such as
 	 * disabling EU clock gating as required.
 	 */
-	struct i915_request *
-		(*enable_metric_set)(struct i915_perf_stream *stream);
+	int (*enable_metric_set)(struct i915_perf_stream *stream,
+				 struct i915_active *active);
 
 	/**
 	 * @disable_metric_set: Remove system constraints associated with using
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index e1ced1df13e1..3baa98fa5009 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -380,7 +380,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma);
 static inline int i915_vma_sync(struct i915_vma *vma)
 {
 	/* Wait for the asynchronous bindings and pending GPU reads */
-	return i915_active_wait(&vma->active);
+	return i915_active_wait(&vma->active, TASK_INTERRUPTIBLE);
 }
 
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 3a37c67ab6c4..25909e590d8c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -153,7 +153,7 @@ static int live_active_wait(void *arg)
 	if (IS_ERR(active))
 		return PTR_ERR(active);
 
-	i915_active_wait(&active->base);
+	i915_active_wait(&active->base, TASK_UNINTERRUPTIBLE);
 	if (!READ_ONCE(active->retired)) {
 		struct drm_printer p = drm_err_printer(__func__);
 
@@ -230,7 +230,7 @@ static int live_active_barrier(void *arg)
 	i915_active_release(&active->base);
 
 	if (err == 0)
-		err = i915_active_wait(&active->base);
+		err = i915_active_wait(&active->base, TASK_UNINTERRUPTIBLE);
 
 	if (err == 0 && !READ_ONCE(active->retired)) {
 		pr_err("i915_active not retired after flushing barriers!\n");
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 11/22] drm/i915/selftests: Add request throughput measurement to perf
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (8 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 10/22] drm/i915/perf: Schedule oa_config after modifying the contexts Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 12/22] drm/i915/execlists: Check the sentinel is alone in the ELSP Chris Wilson
                   ` (14 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

Under ideal circumstances, the driver should be able to keep the GPU
fully saturated with work. Measure how close to ideal we get under the
harshest of conditions with no user payload.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/i915_request.c | 285 +++++++++++++++++-
 2 files changed, 285 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index 3bf7f53e9924..d8da142985eb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -16,5 +16,6 @@
  * Tests are executed in order by igt/i915_selftest
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
+selftest(request, i915_request_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index f89d9c42f1fa..d4c088cfe4e1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prime_numbers.h>
+#include <linux/pm_qos.h>
 
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
@@ -1233,7 +1234,7 @@ static int live_parallel_engines(void *arg)
 		struct igt_live_test t;
 		unsigned int idx;
 
-		snprintf(name, sizeof(name), "%pS", fn);
+		snprintf(name, sizeof(name), "%ps", *fn);
 		err = igt_live_test_begin(&t, i915, __func__, name);
 		if (err)
 			break;
@@ -1470,3 +1471,285 @@ int i915_request_live_selftests(struct drm_i915_private *i915)
 
 	return i915_subtests(tests, i915);
 }
+
+struct perf_parallel {
+	struct intel_engine_cs *engine;
+	unsigned long count;
+	ktime_t time;
+	ktime_t busy;
+	u64 runtime;
+};
+
+static int switch_to_kernel_sync(struct intel_context *ce, int err)
+{
+	struct i915_request *rq;
+	struct dma_fence *fence;
+
+	rq = intel_engine_create_kernel_request(ce->engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	fence = i915_active_fence_get(&ce->timeline->last_request);
+	if (fence) {
+		i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+	}
+
+	rq = i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	while (!err && !intel_engine_is_idle(ce->engine))
+		intel_engine_flush_submission(ce->engine);
+
+	return err;
+}
+
+static int perf_sync(void *arg)
+{
+	struct perf_parallel *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	bool busy;
+	int err = 0;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	p->time = ktime_get();
+	count = 0;
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		err = 0;
+		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+			err = -ETIME;
+		i915_request_put(rq);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_many(void *arg)
+{
+	struct perf_parallel *p = arg;
+	struct intel_engine_cs *engine = p->engine;
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int err = 0;
+	bool busy;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	busy = false;
+	if (intel_engine_supports_stats(engine) &&
+	    !intel_enable_engine_stats(engine)) {
+		p->busy = intel_engine_get_busy_time(engine);
+		busy = true;
+	}
+
+	count = 0;
+	p->time = ktime_get();
+	do {
+		struct i915_request *rq;
+
+		rq = i915_request_create(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		i915_request_add(rq);
+		count++;
+	} while (!__igt_timeout(end_time, NULL));
+	p->time = ktime_sub(ktime_get(), p->time);
+
+	if (busy) {
+		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
+				    p->busy);
+		intel_disable_engine_stats(engine);
+	}
+
+	err = switch_to_kernel_sync(ce, err);
+	p->runtime = intel_context_get_total_runtime_ns(ce);
+	p->count = count;
+
+	intel_context_unpin(ce);
+	intel_context_put(ce);
+	return err;
+}
+
+static int perf_parallel_engines(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {
+		perf_sync,
+		perf_many,
+		NULL,
+	};
+	const unsigned int nengines = num_uabi_engines(i915);
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);
+	struct pm_qos_request *qos;
+	struct {
+		struct perf_parallel p;
+		struct task_struct *tsk;
+	} *engines;
+	int err = 0;
+
+	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
+	if (!engines)
+		return -ENOMEM;
+
+	qos = kzalloc(sizeof(*qos), GFP_KERNEL);
+	if (qos)
+		pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
+	for (fn = func; *fn; fn++) {
+		char name[KSYM_NAME_LEN];
+		struct igt_live_test t;
+		unsigned int idx;
+
+		snprintf(name, sizeof(name), "%ps", *fn);
+		err = igt_live_test_begin(&t, i915, __func__, name);
+		if (err)
+			break;
+
+		atomic_set(&i915->selftest.counter, nengines);
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			intel_engine_pm_get(engine);
+
+			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
+			engines[idx].p.engine = engine;
+
+			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
+						       "igt:%s", engine->name);
+			if (IS_ERR(engines[idx].tsk)) {
+				err = PTR_ERR(engines[idx].tsk);
+				intel_engine_pm_put(engine);
+				break;
+			}
+			get_task_struct(engines[idx++].tsk);
+		}
+
+		yield(); /* start all threads before we kthread_stop() */
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			int status;
+
+			if (IS_ERR(engines[idx].tsk))
+				break;
+
+			status = kthread_stop(engines[idx].tsk);
+			if (status && !err)
+				err = status;
+
+			intel_engine_pm_put(engine);
+			put_task_struct(engines[idx++].tsk);
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+
+		idx = 0;
+		for_each_uabi_engine(engine, i915) {
+			struct perf_parallel *p = &engines[idx].p;
+			u64 busy = 100 * ktime_to_ns(p->busy);
+			u64 dt = ktime_to_ns(p->time);
+			int integer, decimal;
+
+			if (dt) {
+				integer = div64_u64(busy, dt);
+				busy -= integer * dt;
+				decimal = div64_u64(100 * busy, dt);
+			} else {
+				integer = 0;
+				decimal = 0;
+			}
+
+			GEM_BUG_ON(engine != p->engine);
+			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
+				name, engine->name, p->count, integer, decimal,
+				div_u64(p->runtime, 1000 * 1000),
+				div_u64(ktime_to_ns(p->time), 1000 * 1000));
+			idx++;
+		}
+	}
+
+	if (qos) {
+		pm_qos_remove_request(qos);
+		kfree(qos);
+	}
+	kfree(engines);
+	return err;
+}
+
+int i915_request_perf_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(perf_parallel_engines),
+	};
+
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 12/22] drm/i915/execlists: Check the sentinel is alone in the ELSP
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (9 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 11/22] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02 14:04   ` Mika Kuoppala
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 13/22] drm/i915/execlists: Reduce preempt-to-busy roundtrip delay Chris Wilson
                   ` (13 subsequent siblings)
  24 siblings, 1 reply; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

We only use sentinel requests for "preempt-to-idle" passes, so assert
that they are the only request in a new submission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 15d56cd3937e..b9b3f78f1324 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1448,6 +1448,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 {
 	struct i915_request * const *port, *rq;
 	struct intel_context *ce = NULL;
+	bool sentinel = false;
 
 	trace_ports(execlists, msg, execlists->pending);
 
@@ -1481,6 +1482,26 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 		}
 		ce = rq->context;
 
+		/*
+		 * Sentinels are supposed to be lonely so they flush the
+		 * current exection off the HW. Check that they are the
+		 * only request in the pending submission.
+		 */
+		if (sentinel) {
+			GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n",
+				      ce->timeline->fence_context,
+				      port - execlists->pending);
+			return false;
+		}
+
+		sentinel = i915_request_has_sentinel(rq);
+		if (sentinel && port != execlists->pending) {
+			GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n",
+				      ce->timeline->fence_context,
+				      port - execlists->pending);
+			return false;
+		}
+
 		/* Hold tightly onto the lock to prevent concurrent retires! */
 		if (!spin_trylock_irqsave(&rq->lock, flags))
 			continue;
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 13/22] drm/i915/execlists: Reduce preempt-to-busy roundtrip delay
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (10 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 12/22] drm/i915/execlists: Check the sentinel is alone in the ELSP Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 14/22] dma-buf: Prettify typecasts for dma-fence-chain Chris Wilson
                   ` (12 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

To prevent the context from proceeding past the end of the request as we
unwind, we embed a semaphore into the footer of each request. (If the
context were to skip past the end of the request as we perform the
preemption, next time we reload the context it's RING_HEAD would be past
the RING_TAIL and instead of replaying the commands it would read the
read of the uninitialised ringbuffer.)

However, this requires us to keep the ring paused at the end of the
request until we have a change to process the preemption ack and remove
the semaphore. Our processing of acks is at the whim of ksoftirqd, and
so it is entirely possible that the GPU has to wait for the tasklet
before it can proceed with the next request.

It was suggested that we could also embed a MI_LOAD_REGISTER_MEM into
the footer to read the current RING_TAIL from the context, which would
allow us to not only avoid this round trip (and so release the context
as soon as we had submitted the preemption request to in ELSP), but also
skip using ELSP for lite-restores entirely. That has the nice benefit of
dramatically reducing contention and the frequency of interrupts when a
client submits two or more execbufs in rapid succession.

* This did not work out quite as well as anticipated due to us reloading
the new RING_TAIL from the context image moments before the HW acted
upon the ELSP. With the calamitous effect that we would submit a
preemption request with an identical RING_TAIL as the current RING_HEAD,
causing us to fail WaIdleLiteRestore and the HW stop working.

However, mmio access to RING_TAIL was defeatured in gen11 so we can only
employ this handy trick for gen8/gen9.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h | 23 +++--
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 93 +++++++++++++++++++-
 2 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 80cdde712842..4dfaaaa32e32 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -479,14 +479,15 @@ struct intel_engine_cs {
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
-#define I915_ENGINE_USING_CMD_PARSER BIT(0)
-#define I915_ENGINE_SUPPORTS_STATS   BIT(1)
-#define I915_ENGINE_HAS_PREEMPTION   BIT(2)
-#define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
-#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
-#define I915_ENGINE_IS_VIRTUAL       BIT(5)
-#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
-#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
+#define I915_ENGINE_REQUIRES_CMD_PARSER		BIT(0)
+#define I915_ENGINE_USING_CMD_PARSER		BIT(1)
+#define I915_ENGINE_SUPPORTS_STATS		BIT(2)
+#define I915_ENGINE_HAS_PREEMPTION		BIT(3)
+#define I915_ENGINE_HAS_SEMAPHORES		BIT(4)
+#define I915_ENGINE_HAS_TAIL_LRM		BIT(5)
+#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET	BIT(6)
+#define I915_ENGINE_IS_VIRTUAL			BIT(7)
+#define I915_ENGINE_HAS_RELATIVE_MMIO		BIT(8)
 	unsigned int flags;
 
 	/*
@@ -584,6 +585,12 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
 }
 
+static inline bool
+intel_engine_has_tail_lrm(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_TAIL_LRM;
+}
+
 static inline bool
 intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b9b3f78f1324..c70dc4cfc4f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1836,6 +1836,76 @@ static inline void clear_ports(struct i915_request **ports, int count)
 	memset_p((void **)ports, NULL, count);
 }
 
+static struct i915_request *
+skip_lite_restore(struct intel_engine_cs *const engine,
+		  struct i915_request *first,
+		  bool *submit)
+{
+	struct intel_engine_execlists *const execlists = &engine->execlists;
+	struct i915_request *last = first;
+	struct rb_node *rb;
+
+	if (!intel_engine_has_tail_lrm(engine))
+		return last;
+
+	GEM_BUG_ON(*submit);
+	while ((rb = rb_first_cached(&execlists->queue))) {
+		struct i915_priolist *p = to_priolist(rb);
+		struct i915_request *rq, *rn;
+		int i;
+
+		priolist_for_each_request_consume(rq, rn, p, i) {
+			if (!can_merge_rq(last, rq))
+				goto out;
+
+			if (__i915_request_submit(rq)) {
+				*submit = true;
+				last = rq;
+			}
+		}
+
+		rb_erase_cached(&p->node, &execlists->queue);
+		i915_priolist_free(p);
+	}
+out:
+	if (*submit) {
+		ring_set_paused(engine, 1);
+
+		/*
+		 * If we are quick and the current context hasn't yet completed
+		 * its request, we can just tell it to extend the RING_TAIL
+		 * onto the next without having to submit a new ELSP.
+		 */
+		if (!i915_request_completed(first)) {
+			struct i915_request **port;
+
+			ENGINE_TRACE(engine,
+				     "eliding lite-restore last=%llx:%lld->%lld, current %d\n",
+				     first->fence.context,
+				     first->fence.seqno,
+				     last->fence.seqno,
+				     hwsp_seqno(last));
+			GEM_BUG_ON(first->context != last->context);
+
+			for (port = (struct i915_request **)execlists->active;
+			     *port != first;
+			     port++)
+				;
+
+			GEM_BUG_ON(first == last);
+			WRITE_ONCE(*port, i915_request_get(last));
+			execlists_update_context(last);
+
+			i915_request_put(first);
+			*submit = false;
+		}
+
+		ring_set_paused(engine, 0);
+	}
+
+	return last;
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1972,6 +2042,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 				return;
 			}
+
+			last = skip_lite_restore(engine, last, &submit);
 		}
 	}
 
@@ -4198,15 +4270,28 @@ static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
 	return cs;
 }
 
+static u32 *emit_lrm_tail(struct i915_request *request, u32 *cs)
+{
+	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+	*cs++ = i915_mmio_reg_offset(RING_TAIL(request->engine->mmio_base));
+	*cs++ = i915_ggtt_offset(request->context->state) +
+		LRC_STATE_PN * PAGE_SIZE +
+		CTX_RING_TAIL * sizeof(u32);
+	*cs++ = 0;
+
+	return cs;
+}
+
 static __always_inline u32*
-gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
-				 u32 *cs)
+gen8_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
 {
 	*cs++ = MI_USER_INTERRUPT;
 
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 	if (intel_engine_has_semaphores(request->engine))
 		cs = emit_preempt_busywait(request, cs);
+	if (intel_engine_has_tail_lrm(request->engine))
+		cs = emit_lrm_tail(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -4295,6 +4380,8 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
 static __always_inline u32*
 gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
 {
+	GEM_BUG_ON(intel_engine_has_tail_lrm(request->engine));
+
 	*cs++ = MI_USER_INTERRUPT;
 
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -4361,6 +4448,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
 		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
 			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+		if (INTEL_GEN(engine->i915) < 11)
+			engine->flags |= I915_ENGINE_HAS_TAIL_LRM;
 	}
 
 	if (INTEL_GEN(engine->i915) >= 12)
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 14/22] dma-buf: Prettify typecasts for dma-fence-chain
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (11 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 13/22] drm/i915/execlists: Reduce preempt-to-busy roundtrip delay Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02 16:11   ` Mika Kuoppala
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 15/22] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
                   ` (11 subsequent siblings)
  24 siblings, 1 reply; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

Inside dma-fence-chain, we use a cmpxchg on an RCU-protected pointer. To
avoid the sparse warning for using the RCU pointer directly, we have to
cast away the __rcu annotation. However, we don't need to use void*
everywhere and can stick to the dma_fence*.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/dma-buf/dma-fence-chain.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
index 44a741677d25..3d123502ff12 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -62,7 +62,8 @@ struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
 			replacement = NULL;
 		}
 
-		tmp = cmpxchg((void **)&chain->prev, (void *)prev, (void *)replacement);
+		tmp = cmpxchg((struct dma_fence __force **)&chain->prev,
+			      prev, replacement);
 		if (tmp == prev)
 			dma_fence_put(tmp);
 		else
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 15/22] dma-buf: Report signaled links inside dma-fence-chain
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (12 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 14/22] dma-buf: Prettify typecasts for dma-fence-chain Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 16/22] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
                   ` (10 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

Whenever we walk along the dma-fence-chain, we prune signaled links to
keep the chain nice and tidy. This leads to situations where we can
prune a link and report the earlier fence as the target seqno --
violating our own consistency checks that the seqno is not more advanced
than the last element in a dma-fence-chain.

Report a NULL fence and success if the seqno has already been signaled.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/dma-buf/dma-fence-chain.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
index 3d123502ff12..c435bbba851c 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -99,6 +99,12 @@ int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
 		return -EINVAL;
 
 	dma_fence_chain_for_each(*pfence, &chain->base) {
+		if ((*pfence)->seqno < seqno) { /* already signaled */
+			dma_fence_put(*pfence);
+			*pfence = NULL;
+			break;
+		}
+
 		if ((*pfence)->context != chain->base.context ||
 		    to_dma_fence_chain(*pfence)->prev_seqno < seqno)
 			break;
@@ -222,6 +228,7 @@ EXPORT_SYMBOL(dma_fence_chain_ops);
  * @chain: the chain node to initialize
  * @prev: the previous fence
  * @fence: the current fence
+ * @seqno: the sequence number (syncpt) of the fence within the chain
  *
  * Initialize a new chain node and either start a new chain or add the node to
  * the existing chain of the previous fence.
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 16/22] dma-buf: Exercise dma-fence-chain under selftests
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (13 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 15/22] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 17/22] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
                   ` (9 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

A few very simple testcases to exercise the dma-fence-chain API.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/dma-buf/Makefile             |   3 +-
 drivers/dma-buf/selftests.h          |   1 +
 drivers/dma-buf/st-dma-fence-chain.c | 713 +++++++++++++++++++++++++++
 3 files changed, 716 insertions(+), 1 deletion(-)
 create mode 100644 drivers/dma-buf/st-dma-fence-chain.c

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 9c190026bfab..995e05f609ff 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_UDMABUF)		+= udmabuf.o
 
 dmabuf_selftests-y := \
 	selftest.o \
-	st-dma-fence.o
+	st-dma-fence.o \
+	st-dma-fence-chain.o
 
 obj-$(CONFIG_DMABUF_SELFTESTS)	+= dmabuf_selftests.o
diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h
index 5320386f02e5..55918ef9adab 100644
--- a/drivers/dma-buf/selftests.h
+++ b/drivers/dma-buf/selftests.h
@@ -11,3 +11,4 @@
  */
 selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */
 selftest(dma_fence, dma_fence)
+selftest(dma_fence_chain, dma_fence_chain)
diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
new file mode 100644
index 000000000000..bd08ba67b03b
--- /dev/null
+++ b/drivers/dma-buf/st-dma-fence-chain.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-fence-chain.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+
+#include "selftest.h"
+
+static struct kmem_cache *slab_fences;
+
+static inline struct mock_fence {
+	struct dma_fence base;
+	spinlock_t lock;
+} *to_mock_fence(struct dma_fence *f) {
+	return container_of(f, struct mock_fence, base);
+}
+
+static const char *mock_name(struct dma_fence *f)
+{
+	return "mock";
+}
+
+static void mock_fence_release(struct dma_fence *f)
+{
+	kmem_cache_free(slab_fences, to_mock_fence(f));
+}
+
+static const struct dma_fence_ops mock_ops = {
+	.get_driver_name = mock_name,
+	.get_timeline_name = mock_name,
+	.release = mock_fence_release,
+};
+
+static struct dma_fence *mock_fence(void)
+{
+	struct mock_fence *f;
+
+	f = kmem_cache_alloc(slab_fences, GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	spin_lock_init(&f->lock);
+	dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0);
+
+	return &f->base;
+}
+
+static inline struct mock_chain {
+	struct dma_fence_chain base;
+} *to_mock_chain(struct dma_fence *f) {
+	return container_of(f, struct mock_chain, base.base);
+}
+
+static struct dma_fence *mock_chain(struct dma_fence *prev,
+				    struct dma_fence *fence,
+				    u64 seqno)
+{
+	struct mock_chain *f;
+
+	f = kmalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	dma_fence_chain_init(&f->base,
+			     dma_fence_get(prev),
+			     dma_fence_get(fence),
+			     seqno);
+
+	return &f->base.base;
+}
+
+static int sanitycheck(void *arg)
+{
+	struct dma_fence *f, *chain;
+	int err = 0;
+
+	f = mock_fence();
+	if (!f)
+		return -ENOMEM;
+
+	chain = mock_chain(NULL, f, 1);
+	if (!chain)
+		err = -ENOMEM;
+
+	dma_fence_signal(f);
+	dma_fence_put(f);
+
+	dma_fence_put(chain);
+
+	return err;
+}
+
+struct fence_chains {
+	unsigned int chain_length;
+	struct dma_fence **fences;
+	struct dma_fence **chains;
+
+	struct dma_fence *tail;
+};
+
+static uint64_t seqno_inc(unsigned int i)
+{
+	return i + 1;
+}
+
+static int fence_chains_init(struct fence_chains *fc, unsigned int count,
+			     uint64_t (*seqno_fn)(unsigned int))
+{
+	unsigned int i;
+	int err = 0;
+
+	fc->chains = kvmalloc_array(count, sizeof(*fc->chains),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!fc->chains)
+		return -ENOMEM;
+
+	fc->fences = kvmalloc_array(count, sizeof(*fc->fences),
+				    GFP_KERNEL | __GFP_ZERO);
+	if (!fc->fences) {
+		err = -ENOMEM;
+		goto err_chains;
+	}
+
+	fc->tail = NULL;
+	for (i = 0; i < count; i++) {
+		fc->fences[i] = mock_fence();
+		if (!fc->fences[i]) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+
+		fc->chains[i] = mock_chain(fc->tail,
+					   fc->fences[i],
+					   seqno_fn(i));
+		if (!fc->chains[i]) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+
+		fc->tail = fc->chains[i];
+	}
+
+	fc->chain_length = i;
+	return 0;
+
+unwind:
+	for (i = 0; i < count; i++) {
+		dma_fence_put(fc->fences[i]);
+		dma_fence_put(fc->chains[i]);
+	}
+	kvfree(fc->fences);
+err_chains:
+	kvfree(fc->chains);
+	return err;
+}
+
+static void fence_chains_fini(struct fence_chains *fc)
+{
+	unsigned int i;
+
+	for (i = 0; i < fc->chain_length; i++) {
+		dma_fence_signal(fc->fences[i]);
+		dma_fence_put(fc->fences[i]);
+	}
+	kvfree(fc->fences);
+
+	for (i = 0; i < fc->chain_length; i++)
+		dma_fence_put(fc->chains[i]);
+	kvfree(fc->chains);
+}
+
+static int find_seqno(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc);
+	if (err)
+		return err;
+
+	fence = dma_fence_get(fc.tail);
+	err = dma_fence_chain_find_seqno(&fence, 0);
+	dma_fence_put(fence);
+	if (err) {
+		pr_err("Reported %d for find_seqno(0)!\n", err);
+		goto err;
+	}
+
+	for (i = 0; i < fc.chain_length; i++) {
+		fence = dma_fence_get(fc.tail);
+		err = dma_fence_chain_find_seqno(&fence, i + 1);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Reported %d for find_seqno(%d:%d)!\n",
+			       err, fc.chain_length + 1, i + 1);
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",
+			       fc.chain_length + 1, i + 1);
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, i + 1);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Error reported for finding self\n");
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence reported by find self\n");
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, i + 2);
+		dma_fence_put(fence);
+		if (!err) {
+			pr_err("Error not reported for future fence: find_seqno(%d:%d)!\n",
+			       i + 1, i + 2);
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, i);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Error reported for previous fence!\n");
+			goto err;
+		}
+		if (i > 0 && fence != fc.chains[i - 1]) {
+			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",
+			       i + 1, i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int find_signaled(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+
+	err = fence_chains_init(&fc, 2, seqno_inc);
+	if (err)
+		return err;
+
+	dma_fence_signal(fc.fences[0]);
+
+	fence = dma_fence_get(fc.tail);
+	err = dma_fence_chain_find_seqno(&fence, 1);
+	dma_fence_put(fence);
+	if (err) {
+		pr_err("Reported %d for find_seqno()!\n", err);
+		goto err;
+	}
+
+	if (fence && fence != fc.chains[0]) {
+		pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:1\n",
+		       fence->seqno);
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, 1);
+		dma_fence_put(fence);
+		if (err)
+			pr_err("Reported %d for finding self!\n", err);
+
+		err = -EINVAL;
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int find_out_of_order(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+
+	err = fence_chains_init(&fc, 3, seqno_inc);
+	if (err)
+		return err;
+
+	dma_fence_signal(fc.fences[1]);
+
+	fence = dma_fence_get(fc.tail);
+	err = dma_fence_chain_find_seqno(&fence, 2);
+	dma_fence_put(fence);
+	if (err) {
+		pr_err("Reported %d for find_seqno()!\n", err);
+		goto err;
+	}
+
+	if (fence && fence != fc.chains[1]) {
+		pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:2\n",
+		       fence->seqno);
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, 2);
+		dma_fence_put(fence);
+		if (err)
+			pr_err("Reported %d for finding self!\n", err);
+
+		err = -EINVAL;
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static uint64_t seqno_inc2(unsigned int i)
+{
+	return 2 * i + 2;
+}
+
+static int find_gap(void *arg)
+{
+	struct fence_chains fc;
+	struct dma_fence *fence;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc2);
+	if (err)
+		return err;
+
+	for (i = 0; i < fc.chain_length; i++) {
+		fence = dma_fence_get(fc.tail);
+		err = dma_fence_chain_find_seqno(&fence, 2 * i + 1);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Reported %d for find_seqno(%d:%d)!\n",
+			       err, fc.chain_length + 1, 2 * i + 1);
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence.seqno:%lld reported by find_seqno(%d:%d)\n",
+			       fence->seqno,
+			       fc.chain_length + 1,
+			       2 * i + 1);
+			err = -EINVAL;
+			goto err;
+		}
+
+		dma_fence_get(fence);
+		err = dma_fence_chain_find_seqno(&fence, 2 * i + 2);
+		dma_fence_put(fence);
+		if (err) {
+			pr_err("Error reported for finding self\n");
+			goto err;
+		}
+		if (fence != fc.chains[i]) {
+			pr_err("Incorrect fence reported by find self\n");
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+struct find_race {
+	struct fence_chains fc;
+	atomic_t children;
+};
+
+static int __find_race(void *arg)
+{
+	struct find_race *data = arg;
+	int err = 0;
+
+	while (!kthread_should_stop()) {
+		struct dma_fence *fence = dma_fence_get(data->fc.tail);
+		int seqno;
+
+		seqno = prandom_u32_max(data->fc.chain_length) + 1;
+
+		err = dma_fence_chain_find_seqno(&fence, seqno);
+		if (err) {
+			pr_err("Failed to find fence seqno:%d\n",
+			       seqno);
+			dma_fence_put(fence);
+			break;
+		}
+		if (!fence)
+			goto signal;
+
+		err = dma_fence_chain_find_seqno(&fence, seqno);
+		if (err) {
+			pr_err("Reported an invalid fence for find-self:%d\n",
+			       seqno);
+			dma_fence_put(fence);
+			break;
+		}
+
+		if (fence->seqno < seqno) {
+			pr_err("Reported an earlier fence.seqno:%lld for seqno:%d\n",
+			       fence->seqno, seqno);
+			err = -EINVAL;
+			dma_fence_put(fence);
+			break;
+		}
+
+		dma_fence_put(fence);
+
+signal:
+		seqno = prandom_u32_max(data->fc.chain_length - 1);
+		dma_fence_signal(data->fc.fences[seqno]);
+		cond_resched();
+	}
+
+	if (atomic_dec_and_test(&data->children))
+		wake_up_var(&data->children);
+	return err;
+}
+
+static int find_race(void *arg)
+{
+	struct find_race data;
+	int ncpus = num_online_cpus();
+	struct task_struct **threads;
+	unsigned long count;
+	int err;
+	int i;
+
+	err = fence_chains_init(&data.fc, 64 << 10, seqno_inc);
+	if (err)
+		return err;
+
+	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
+	if (!threads) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	atomic_set(&data.children, 0);
+	for (i = 0; i < ncpus; i++) {
+		threads[i] = kthread_run(__find_race, &data, "dmabuf/%d", i);
+		if (IS_ERR(threads[i])) {
+			ncpus = i;
+			break;
+		}
+		atomic_inc(&data.children);
+		get_task_struct(threads[i]);
+	}
+
+	wait_var_event_timeout(&data.children,
+			       !atomic_read(&data.children),
+			       5 * HZ);
+
+	for (i = 0; i < ncpus; i++) {
+		int ret;
+
+		ret = kthread_stop(threads[i]);
+		if (ret && !err)
+			err = ret;
+		put_task_struct(threads[i]);
+	}
+	kfree(threads);
+
+	count = 0;
+	for (i = 0; i < data.fc.chain_length; i++)
+		if (dma_fence_is_signaled(data.fc.fences[i]))
+			count++;
+	pr_info("Completed %lu cycles\n", count);
+
+err:
+	fence_chains_fini(&data.fc);
+	return err;
+}
+
+static int signal_forward(void *arg)
+{
+	struct fence_chains fc;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc);
+	if (err)
+		return err;
+
+	for (i = 0; i < fc.chain_length; i++) {
+		dma_fence_signal(fc.fences[i]);
+
+		if (!dma_fence_is_signaled(fc.chains[i])) {
+			pr_err("chain[%d] not signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (i + 1 < fc.chain_length &&
+		    dma_fence_is_signaled(fc.chains[i + 1])) {
+			pr_err("chain[%d] is signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int signal_backward(void *arg)
+{
+	struct fence_chains fc;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64, seqno_inc);
+	if (err)
+		return err;
+
+	for (i = fc.chain_length; i--; ) {
+		dma_fence_signal(fc.fences[i]);
+
+		if (i > 0 && dma_fence_is_signaled(fc.chains[i])) {
+			pr_err("chain[%d] is signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+	for (i = 0; i < fc.chain_length; i++) {
+		if (!dma_fence_is_signaled(fc.chains[i])) {
+			pr_err("chain[%d] was not signaled!\n", i);
+			err = -EINVAL;
+			goto err;
+		}
+	}
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int __wait_fence_chains(void *arg)
+{
+	struct fence_chains *fc = arg;
+
+	if (dma_fence_wait(fc->tail, false))
+		return -EIO;
+
+	return 0;
+}
+
+static int wait_forward(void *arg)
+{
+	struct fence_chains fc;
+	struct task_struct *tsk;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
+	if (err)
+		return err;
+
+	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
+	if (IS_ERR(tsk)) {
+		err = PTR_ERR(tsk);
+		goto err;
+	}
+	get_task_struct(tsk);
+	yield_to(tsk, true);
+
+	for (i = 0; i < fc.chain_length; i++)
+		dma_fence_signal(fc.fences[i]);
+
+	err = kthread_stop(tsk);
+	put_task_struct(tsk);
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static int wait_backward(void *arg)
+{
+	struct fence_chains fc;
+	struct task_struct *tsk;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
+	if (err)
+		return err;
+
+	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
+	if (IS_ERR(tsk)) {
+		err = PTR_ERR(tsk);
+		goto err;
+	}
+	get_task_struct(tsk);
+	yield_to(tsk, true);
+
+	for (i = fc.chain_length; i--; )
+		dma_fence_signal(fc.fences[i]);
+
+	err = kthread_stop(tsk);
+	put_task_struct(tsk);
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+static void randomise_fences(struct fence_chains *fc)
+{
+	unsigned int count = fc->chain_length;
+
+	/* Fisher-Yates shuffle courtesy of Knuth */
+	while (--count) {
+		unsigned int swp;
+
+		swp = prandom_u32_max(count + 1);
+		if (swp == count)
+			continue;
+
+		swap(fc->fences[count], fc->fences[swp]);
+	}
+}
+
+static int wait_random(void *arg)
+{
+	struct fence_chains fc;
+	struct task_struct *tsk;
+	int err;
+	int i;
+
+	err = fence_chains_init(&fc, 64 << 10, seqno_inc);
+	if (err)
+		return err;
+
+	randomise_fences(&fc);
+
+	tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait");
+	if (IS_ERR(tsk)) {
+		err = PTR_ERR(tsk);
+		goto err;
+	}
+	get_task_struct(tsk);
+	yield_to(tsk, true);
+
+	for (i = 0; i < fc.chain_length; i++)
+		dma_fence_signal(fc.fences[i]);
+
+	err = kthread_stop(tsk);
+	put_task_struct(tsk);
+
+err:
+	fence_chains_fini(&fc);
+	return err;
+}
+
+int dma_fence_chain(void)
+{
+	static const struct subtest tests[] = {
+		SUBTEST(sanitycheck),
+		SUBTEST(find_seqno),
+		SUBTEST(find_signaled),
+		SUBTEST(find_out_of_order),
+		SUBTEST(find_gap),
+		SUBTEST(find_race),
+		SUBTEST(signal_forward),
+		SUBTEST(signal_backward),
+		SUBTEST(wait_forward),
+		SUBTEST(wait_backward),
+		SUBTEST(wait_random),
+	};
+	int ret;
+
+	pr_info("sizeof(dma_fence_chain)=%zu\n",
+		sizeof(struct dma_fence_chain));
+
+	slab_fences = KMEM_CACHE(mock_fence,
+				 SLAB_TYPESAFE_BY_RCU |
+				 SLAB_HWCACHE_ALIGN);
+	if (!slab_fences)
+		return -ENOMEM;
+
+	ret = subtests(tests, NULL);
+
+	kmem_cache_destroy(slab_fences);
+	return ret;
+}
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 17/22] dma-buf: Proxy fence, an unsignaled fence placeholder
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (14 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 16/22] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-03 16:24     ` kbuild test robot
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 18/22] drm/syncobj: Allow use of dma-fence-proxy Chris Wilson
                   ` (8 subsequent siblings)
  24 siblings, 1 reply; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

Often we need to create a fence for a future event that has not yet been
associated with a fence. We can store a proxy fence, a placeholder, in
the timeline and replace it later when the real fence is known. Any
listeners that attach to the proxy fence will automatically be signaled
when the real fence completes, and any future listeners will instead be
attach directly to the real fence avoiding any indirection overhead.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/dma-buf/Makefile             |  13 +-
 drivers/dma-buf/dma-fence-private.h  |  20 +
 drivers/dma-buf/dma-fence-proxy.c    | 188 ++++++++++
 drivers/dma-buf/dma-fence.c          |   4 +-
 drivers/dma-buf/selftests.h          |   1 +
 drivers/dma-buf/st-dma-fence-proxy.c | 541 +++++++++++++++++++++++++++
 include/linux/dma-fence-proxy.h      |  20 +
 7 files changed, 783 insertions(+), 4 deletions(-)
 create mode 100644 drivers/dma-buf/dma-fence-private.h
 create mode 100644 drivers/dma-buf/dma-fence-proxy.c
 create mode 100644 drivers/dma-buf/st-dma-fence-proxy.c
 create mode 100644 include/linux/dma-fence-proxy.h

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 995e05f609ff..afaf6dadd9a3 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,6 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
-	 dma-resv.o seqno-fence.o
+obj-y := \
+	dma-buf.o \
+	dma-fence.o \
+	dma-fence-array.o \
+	dma-fence-chain.o \
+	dma-fence-proxy.o \
+	dma-resv.o \
+	seqno-fence.o
 obj-$(CONFIG_DMABUF_HEAPS)	+= dma-heap.o
 obj-$(CONFIG_DMABUF_HEAPS)	+= heaps/
 obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
@@ -10,6 +16,7 @@ obj-$(CONFIG_UDMABUF)		+= udmabuf.o
 dmabuf_selftests-y := \
 	selftest.o \
 	st-dma-fence.o \
-	st-dma-fence-chain.o
+	st-dma-fence-chain.o \
+	st-dma-fence-proxy.o
 
 obj-$(CONFIG_DMABUF_SELFTESTS)	+= dmabuf_selftests.o
diff --git a/drivers/dma-buf/dma-fence-private.h b/drivers/dma-buf/dma-fence-private.h
new file mode 100644
index 000000000000..6924d28af0fa
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-private.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Fence mechanism for dma-buf and to allow for asynchronous dma access
+ *
+ * Copyright (C) 2012 Canonical Ltd
+ * Copyright (C) 2012 Texas Instruments
+ *
+ * Authors:
+ * Rob Clark <robdclark@gmail.com>
+ * Maarten Lankhorst <maarten.lankhorst@canonical.com>
+ */
+
+#ifndef DMA_FENCE_PRIVATE_H
+#define DMA_FENCE_PRIAVTE_H
+
+struct dma_fence;
+
+bool __dma_fence_enable_signaling(struct dma_fence *fence);
+
+#endif /* DMA_FENCE_PRIAVTE_H */
diff --git a/drivers/dma-buf/dma-fence-proxy.c b/drivers/dma-buf/dma-fence-proxy.c
new file mode 100644
index 000000000000..2cc0002dc47f
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-proxy.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * dma-fence-proxy: placeholder unsignaled fence
+ *
+ * Copyright (C) 2017-2019 Intel Corporation
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/dma-fence-proxy.h>
+#include <linux/export.h>
+#include <linux/irq_work.h>
+#include <linux/slab.h>
+
+#include "dma-fence-private.h"
+
+struct dma_fence_proxy {
+	struct dma_fence base;
+	spinlock_t lock;
+
+	struct dma_fence *real;
+	struct dma_fence_cb cb;
+	struct irq_work work;
+};
+
+static const char *proxy_get_driver_name(struct dma_fence *fence)
+{
+	struct dma_fence_proxy *p = container_of(fence, typeof(*p), base);
+	struct dma_fence *real = READ_ONCE(p->real);
+
+	return real ? real->ops->get_driver_name(real) : "proxy";
+}
+
+static const char *proxy_get_timeline_name(struct dma_fence *fence)
+{
+	struct dma_fence_proxy *p = container_of(fence, typeof(*p), base);
+	struct dma_fence *real = READ_ONCE(p->real);
+
+	return real ? real->ops->get_timeline_name(real) : "unset";
+}
+
+static void proxy_irq_work(struct irq_work *work)
+{
+	struct dma_fence_proxy *p = container_of(work, typeof(*p), work);
+
+	dma_fence_signal(&p->base);
+	dma_fence_put(&p->base);
+}
+
+static void proxy_callback(struct dma_fence *real, struct dma_fence_cb *cb)
+{
+	struct dma_fence_proxy *p = container_of(cb, typeof(*p), cb);
+
+	if (real->error)
+		dma_fence_set_error(&p->base, real->error);
+
+	irq_work_queue(&p->work);
+}
+
+static bool proxy_enable_signaling(struct dma_fence *fence)
+{
+	struct dma_fence_proxy *p = container_of(fence, typeof(*p), base);
+	struct dma_fence *real = READ_ONCE(p->real);
+	bool ret = true;
+
+	if (real) {
+		spin_lock_nested(real->lock, SINGLE_DEPTH_NESTING);
+		ret = __dma_fence_enable_signaling(real);
+		spin_unlock(real->lock);
+	}
+
+	return ret;
+}
+
+static void proxy_release(struct dma_fence *fence)
+{
+	struct dma_fence_proxy *p = container_of(fence, typeof(*p), base);
+
+	dma_fence_put(p->real);
+	dma_fence_free(&p->base);
+}
+
+static const struct dma_fence_ops dma_fence_proxy_ops = {
+	.get_driver_name = proxy_get_driver_name,
+	.get_timeline_name = proxy_get_timeline_name,
+	.enable_signaling = proxy_enable_signaling,
+	.wait = dma_fence_default_wait,
+	.release = proxy_release,
+};
+
+/**
+ * dma_fence_create_proxy - Create an unset dma-fence
+ *
+ * dma_fence_create_proxy() creates a new dma_fence stub that is initially
+ * unsignaled and may later be replaced with a real fence. Any listeners
+ * to the proxy fence will be signaled when the target fence signals its
+ * completion.
+ */
+struct dma_fence *dma_fence_create_proxy(void)
+{
+	struct dma_fence_proxy *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	spin_lock_init(&p->lock);
+	dma_fence_init(&p->base, &dma_fence_proxy_ops, &p->lock,
+		       dma_fence_context_alloc(1), 0);
+	init_irq_work(&p->work, proxy_irq_work);
+
+	return &p->base;
+}
+EXPORT_SYMBOL(dma_fence_create_proxy);
+
+static void wrap_signal_locked(struct dma_fence *fence, struct dma_fence *real)
+{
+	if (real->error)
+		dma_fence_set_error(fence, real->error);
+	dma_fence_signal_locked(fence);
+}
+
+static void proxy_assign(struct dma_fence *fence, struct dma_fence *real)
+{
+	struct dma_fence_proxy *p = container_of(fence, typeof(*p), base);
+	unsigned long flags;
+
+	if (WARN_ON(fence == real))
+		return;
+
+	if (WARN_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)))
+		return;
+
+	if (WARN_ON(p->real))
+		return;
+
+	spin_lock_irqsave(p->base.lock, flags);
+
+	if (unlikely(!real)) {
+		dma_fence_signal_locked(&p->base);
+		goto unlock;
+	}
+
+	p->real = dma_fence_get(real);
+
+	spin_lock_nested(real->lock, SINGLE_DEPTH_NESTING);
+	if (dma_fence_is_signaled(real)) {
+		wrap_signal_locked(&p->base, real);
+	} else if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			    &p->base.flags) &&
+		   !__dma_fence_enable_signaling(real)) {
+		wrap_signal_locked(&p->base, real);
+	} else {
+		dma_fence_get(&p->base);
+		p->cb.func = proxy_callback;
+		list_add_tail(&p->cb.node, &real->cb_list);
+	}
+	spin_unlock(real->lock);
+
+unlock:
+	spin_unlock_irqrestore(p->base.lock, flags);
+}
+
+/**
+ * dma_fence_replace_proxy - Replace the proxy fence with the real target
+ * @slot: pointer to location of fence to update
+ * @fence: the new fence to store in @slot
+ *
+ * Once the real dma_fence is known, we can replace the proxy fence holder
+ * with a pointer to the real dma fence. Future listeners will attach to
+ * the real fence, avoiding any indirection overhead. Previous listeners
+ * will remain attached to the proxy fence, and be signaled in turn when
+ * the target fence completes.
+ */
+struct dma_fence *
+dma_fence_replace_proxy(struct dma_fence __rcu **slot, struct dma_fence *fence)
+{
+	struct dma_fence *old;
+
+	if (fence)
+		dma_fence_get(fence);
+
+	old = rcu_replace_pointer(*slot, fence, true);
+	if (old && old->ops == &dma_fence_proxy_ops)
+		proxy_assign(old, fence);
+
+	return old;
+}
+EXPORT_SYMBOL(dma_fence_replace_proxy);
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 052a41e2451c..fa7bedc6703d 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -19,6 +19,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/dma_fence.h>
 
+#include "dma-fence-private.h"
+
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
@@ -273,7 +275,7 @@ void dma_fence_free(struct dma_fence *fence)
 }
 EXPORT_SYMBOL(dma_fence_free);
 
-static bool __dma_fence_enable_signaling(struct dma_fence *fence)
+bool __dma_fence_enable_signaling(struct dma_fence *fence)
 {
 	bool was_set;
 
diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h
index 55918ef9adab..616eca70e2d8 100644
--- a/drivers/dma-buf/selftests.h
+++ b/drivers/dma-buf/selftests.h
@@ -12,3 +12,4 @@
 selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */
 selftest(dma_fence, dma_fence)
 selftest(dma_fence_chain, dma_fence_chain)
+selftest(dma_fence_proxy, dma_fence_proxy)
diff --git a/drivers/dma-buf/st-dma-fence-proxy.c b/drivers/dma-buf/st-dma-fence-proxy.c
new file mode 100644
index 000000000000..4e066b8e5a7f
--- /dev/null
+++ b/drivers/dma-buf/st-dma-fence-proxy.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-fence-proxy.h>
+#include <linux/kernel.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "selftest.h"
+
+static struct kmem_cache *slab_fences;
+
+static struct mock_fence {
+	struct dma_fence base;
+	spinlock_t lock;
+} *to_mock_fence(struct dma_fence *f) {
+	return container_of(f, struct mock_fence, base);
+}
+
+static const char *mock_name(struct dma_fence *f)
+{
+	return "mock";
+}
+
+static void mock_fence_release(struct dma_fence *f)
+{
+	kmem_cache_free(slab_fences, to_mock_fence(f));
+}
+
+static const struct dma_fence_ops mock_ops = {
+	.get_driver_name = mock_name,
+	.get_timeline_name = mock_name,
+	.release = mock_fence_release,
+};
+
+static struct dma_fence *mock_fence(void)
+{
+	struct mock_fence *f;
+
+	f = kmem_cache_alloc(slab_fences, GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	spin_lock_init(&f->lock);
+	dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0);
+
+	return &f->base;
+}
+
+static int sanitycheck(void *arg)
+{
+	struct dma_fence *f;
+
+	f = dma_fence_create_proxy();
+	if (!f)
+		return -ENOMEM;
+
+	dma_fence_signal(f);
+	dma_fence_put(f);
+
+	return 0;
+}
+
+struct fences {
+	struct dma_fence *real;
+	struct dma_fence *proxy;
+	struct dma_fence __rcu *slot;
+};
+
+static int create_fences(struct fences *f, bool attach)
+{
+	f->proxy = dma_fence_create_proxy();
+	if (!f->proxy)
+		return -ENOMEM;
+
+	RCU_INIT_POINTER(f->slot, f->proxy);
+
+	f->real = mock_fence();
+	if (!f->real) {
+		dma_fence_put(f->proxy);
+		return -ENOMEM;
+	}
+
+	if (attach)
+		dma_fence_replace_proxy(&f->slot, f->real);
+
+	return 0;
+}
+
+static void free_fences(struct fences *f)
+{
+	dma_fence_put(dma_fence_replace_proxy(&f->slot, NULL));
+	dma_fence_put(f->real);
+	dma_fence_put(f->proxy);
+}
+
+static int wrap_signaling(void *arg)
+{
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_is_signaled(f.proxy)) {
+		pr_err("Fence unexpectedly signaled on creation\n");
+		goto err_free;
+	}
+
+	if (dma_fence_signal(f.real)) {
+		pr_err("Fence reported being already signaled\n");
+		goto err_free;
+	}
+
+	if (!dma_fence_is_signaled(f.proxy)) {
+		pr_err("Fence not reporting signaled\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_signaling_recurse(void *arg)
+{
+	struct fences f;
+	struct dma_fence *chain;
+	int err = -EINVAL;
+
+	if (create_fences(&f, false))
+		return -ENOMEM;
+
+	chain = dma_fence_create_proxy();
+	if (!chain) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	dma_fence_replace_proxy(&f.slot, chain);
+	dma_fence_put(dma_fence_replace_proxy(&f.slot, f.real));
+	dma_fence_put(chain);
+
+	/* f.real <- chain <- f.proxy */
+
+	if (dma_fence_is_signaled(f.proxy)) {
+		pr_err("Fence unexpectedly signaled on creation\n");
+		goto err_free;
+	}
+
+	if (dma_fence_signal(f.real)) {
+		pr_err("Fence reported being already signaled\n");
+		goto err_free;
+	}
+
+	if (!dma_fence_is_signaled(f.proxy)) {
+		pr_err("Fence not reporting signaled\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+struct simple_cb {
+	struct dma_fence_cb cb;
+	bool seen;
+};
+
+static void simple_callback(struct dma_fence *f, struct dma_fence_cb *cb)
+{
+	smp_store_mb(container_of(cb, struct simple_cb, cb)->seen, true);
+}
+
+static int wrap_add_callback(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_late_add_callback(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	dma_fence_signal(f.real);
+
+	if (!dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Added callback, but fence was already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (cb.seen) {
+		pr_err("Callback called after failed attachment!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_early_add_callback(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, false))
+		return -ENOMEM;
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_replace_proxy(&f.slot, f.real);
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_early_add_callback_late(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, false))
+		return -ENOMEM;
+
+	dma_fence_signal(f.real);
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_replace_proxy(&f.slot, f.real);
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_early_add_callback_early(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, false))
+		return -ENOMEM;
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_replace_proxy(&f.slot, f.real);
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_rm_callback(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	if (!dma_fence_remove_callback(f.proxy, &cb.cb)) {
+		pr_err("Failed to remove callback!\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (cb.seen) {
+		pr_err("Callback still signaled after removal!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_late_rm_callback(void *arg)
+{
+	struct simple_cb cb = {};
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) {
+		pr_err("Failed to add callback, fence already signaled!\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (!cb.seen) {
+		pr_err("Callback failed!\n");
+		goto err_free;
+	}
+
+	if (dma_fence_remove_callback(f.proxy, &cb.cb)) {
+		pr_err("Callback removal succeed after being executed!\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_status(void *arg)
+{
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_get_status(f.proxy)) {
+		pr_err("Fence unexpectedly has signaled status on creation\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (!dma_fence_get_status(f.proxy)) {
+		pr_err("Fence not reporting signaled status\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_error(void *arg)
+{
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	dma_fence_set_error(f.real, -EIO);
+
+	if (dma_fence_get_status(f.proxy)) {
+		pr_err("Fence unexpectedly has error status before signal\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+	if (dma_fence_get_status(f.proxy) != -EIO) {
+		pr_err("Fence not reporting error status, got %d\n",
+		       dma_fence_get_status(f.proxy));
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	free_fences(&f);
+	return err;
+}
+
+static int wrap_wait(void *arg)
+{
+	struct fences f;
+	int err = -EINVAL;
+
+	if (create_fences(&f, true))
+		return -ENOMEM;
+
+	if (dma_fence_wait_timeout(f.proxy, false, 0) != 0) {
+		pr_err("Wait reported complete before being signaled\n");
+		goto err_free;
+	}
+
+	dma_fence_signal(f.real);
+
+	if (dma_fence_wait_timeout(f.proxy, false, 0) == 0) {
+		pr_err("Wait reported incomplete after being signaled\n");
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	dma_fence_signal(f.real);
+	free_fences(&f);
+	return err;
+}
+
+struct wait_timer {
+	struct timer_list timer;
+	struct fences f;
+};
+
+static void wait_timer(struct timer_list *timer)
+{
+	struct wait_timer *wt = from_timer(wt, timer, timer);
+
+	dma_fence_signal(wt->f.real);
+}
+
+static int wrap_wait_timeout(void *arg)
+{
+	struct wait_timer wt;
+	int err = -EINVAL;
+
+	if (create_fences(&wt.f, true))
+		return -ENOMEM;
+
+	timer_setup_on_stack(&wt.timer, wait_timer, 0);
+
+	if (dma_fence_wait_timeout(wt.f.proxy, false, 1) != 0) {
+		pr_err("Wait reported complete before being signaled\n");
+		goto err_free;
+	}
+
+	mod_timer(&wt.timer, jiffies + 1);
+
+	if (dma_fence_wait_timeout(wt.f.proxy, false, 2) != 0) {
+		if (timer_pending(&wt.timer)) {
+			pr_notice("Timer did not fire within the jiffie!\n");
+			err = 0; /* not our fault! */
+		} else {
+			pr_err("Wait reported incomplete after timeout\n");
+		}
+		goto err_free;
+	}
+
+	err = 0;
+err_free:
+	del_timer_sync(&wt.timer);
+	destroy_timer_on_stack(&wt.timer);
+	dma_fence_signal(wt.f.real);
+	free_fences(&wt.f);
+	return err;
+}
+
+int dma_fence_proxy(void)
+{
+	static const struct subtest tests[] = {
+		SUBTEST(sanitycheck),
+		SUBTEST(wrap_signaling),
+		SUBTEST(wrap_signaling_recurse),
+		SUBTEST(wrap_add_callback),
+		SUBTEST(wrap_late_add_callback),
+		SUBTEST(wrap_early_add_callback),
+		SUBTEST(wrap_early_add_callback_late),
+		SUBTEST(wrap_early_add_callback_early),
+		SUBTEST(wrap_rm_callback),
+		SUBTEST(wrap_late_rm_callback),
+		SUBTEST(wrap_status),
+		SUBTEST(wrap_error),
+		SUBTEST(wrap_wait),
+		SUBTEST(wrap_wait_timeout),
+	};
+	int ret;
+
+	slab_fences = KMEM_CACHE(mock_fence,
+				 SLAB_TYPESAFE_BY_RCU |
+				 SLAB_HWCACHE_ALIGN);
+	if (!slab_fences)
+		return -ENOMEM;
+
+	ret = subtests(tests, NULL);
+
+	kmem_cache_destroy(slab_fences);
+
+	return ret;
+}
diff --git a/include/linux/dma-fence-proxy.h b/include/linux/dma-fence-proxy.h
new file mode 100644
index 000000000000..587d5044f0bf
--- /dev/null
+++ b/include/linux/dma-fence-proxy.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * dma-fence-proxy: allows waiting upon unset and future fences
+ *
+ * Copyright (C) 2017 Intel Corporation
+ */
+
+#ifndef __LINUX_DMA_FENCE_PROXY_H
+#define __LINUX_DMA_FENCE_PROXY_H
+
+#include <linux/kernel.h>
+
+struct dma_fence;
+
+struct dma_fence *dma_fence_create_proxy(void);
+
+struct dma_fence *
+dma_fence_replace_proxy(struct dma_fence __rcu **slot, struct dma_fence *fence);
+
+#endif /* __LINUX_DMA_FENCE_PROXY_H */
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 18/22] drm/syncobj: Allow use of dma-fence-proxy
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (15 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 17/22] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 19/22] drm/i915/gem: Teach execbuf how to wait on future syncobj Chris Wilson
                   ` (7 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

Allow the callers to supply a dma-fence-proxy for asynchronous waiting on
future fences.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_syncobj.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 42d46414f767..e141db0e1eb6 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -184,6 +184,7 @@
  */
 
 #include <linux/anon_inodes.h>
+#include <linux/dma-fence-proxy.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/sched/signal.h>
@@ -324,14 +325,9 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 	struct dma_fence *old_fence;
 	struct syncobj_wait_entry *cur, *tmp;
 
-	if (fence)
-		dma_fence_get(fence);
-
 	spin_lock(&syncobj->lock);
 
-	old_fence = rcu_dereference_protected(syncobj->fence,
-					      lockdep_is_held(&syncobj->lock));
-	rcu_assign_pointer(syncobj->fence, fence);
+	old_fence = dma_fence_replace_proxy(&syncobj->fence, fence);
 
 	if (fence != old_fence) {
 		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 19/22] drm/i915/gem: Teach execbuf how to wait on future syncobj
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (16 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 18/22] drm/syncobj: Allow use of dma-fence-proxy Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 20/22] drm/i915/gem: Allow combining submit-fences with syncobj Chris Wilson
                   ` (6 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

If a syncobj has not yet been assigned, treat it as a future fence and
install and wait upon a dma-fence-proxy. The proxy will be replace by
the real fence later, and that fence will be responsible for signaling
our waiter.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 577c84872acc..327db9e6bf5b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/intel-iommu.h>
+#include <linux/dma-fence-proxy.h>
 #include <linux/dma-resv.h>
 #include <linux/sync_file.h>
 #include <linux/uaccess.h>
@@ -2495,8 +2496,24 @@ await_fence_array(struct i915_execbuffer *eb,
 			continue;
 
 		fence = drm_syncobj_fence_get(syncobj);
-		if (!fence)
-			return -EINVAL;
+		if (!fence) {
+			struct dma_fence *old;
+
+			fence = dma_fence_create_proxy();
+			if (!fence)
+				return -ENOMEM;
+
+			spin_lock(&syncobj->lock);
+			old = rcu_dereference_protected(syncobj->fence, true);
+			if (unlikely(old)) {
+				dma_fence_put(fence);
+				fence = dma_fence_get(old);
+			} else {
+				rcu_assign_pointer(syncobj->fence,
+						   dma_fence_get(fence));
+			}
+			spin_unlock(&syncobj->lock);
+		}
 
 		err = i915_request_await_dma_fence(eb->request, fence);
 		dma_fence_put(fence);
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 20/22] drm/i915/gem: Allow combining submit-fences with syncobj
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (17 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 19/22] drm/i915/gem: Teach execbuf how to wait on future syncobj Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 21/22] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
                   ` (5 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx

Fixes: a88b6e4cbafd ("drm/i915: Allow specification of parallel execbuf")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 +++++++---
 include/uapi/drm/i915_drm.h                    |  7 ++++---
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 327db9e6bf5b..8ef4d5316fcc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2460,7 +2460,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
 
-		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+		fences[n] = ptr_pack_bits(syncobj, fence.flags, 3);
 	}
 
 	return fences;
@@ -2491,7 +2491,7 @@ await_fence_array(struct i915_execbuffer *eb,
 		struct dma_fence *fence;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(fences[n], &flags, 3);
 		if (!(flags & I915_EXEC_FENCE_WAIT))
 			continue;
 
@@ -2515,7 +2515,11 @@ await_fence_array(struct i915_execbuffer *eb,
 			spin_unlock(&syncobj->lock);
 		}
 
-		err = i915_request_await_dma_fence(eb->request, fence);
+		if (flags & I915_EXEC_FENCE_WAIT_SUBMIT)
+			err = i915_request_await_execution(eb->request, fence,
+							   eb->engine->bond_execute);
+		else
+			err = i915_request_await_dma_fence(eb->request, fence);
 		dma_fence_put(fence);
 		if (err < 0)
 			return err;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2813e579b480..3a24817ca25b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1040,9 +1040,10 @@ struct drm_i915_gem_exec_fence {
 	 */
 	__u32 handle;
 
-#define I915_EXEC_FENCE_WAIT            (1<<0)
-#define I915_EXEC_FENCE_SIGNAL          (1<<1)
-#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
+#define I915_EXEC_FENCE_WAIT            (1u << 0)
+#define I915_EXEC_FENCE_SIGNAL          (1u << 1)
+#define I915_EXEC_FENCE_WAIT_SUBMIT     (1u << 2)
+#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_WAIT_SUBMIT << 1))
 	__u32 flags;
 };
 
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 21/22] drm/i915/gt: Declare when we enabled timeslicing
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (18 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 20/22] drm/i915/gem: Allow combining submit-fences with syncobj Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 22/22] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
                   ` (4 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kenneth Graunke

Let userspace know if they can trust timeslicing by including it as part
of the I915_PARAM_HAS_SCHEDULER::I915_SCHEDULER_CAP_TIMESLICING

v2: Only declare timeslicing if we can safely preempt userspace.

Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
---
 drivers/gpu/drm/i915/gt/intel_engine.h      | 3 ++-
 drivers/gpu/drm/i915/gt/intel_engine_user.c | 5 +++++
 include/uapi/drm/i915_drm.h                 | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 29c8c03c5caa..a32dc82a90d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -326,7 +326,8 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine)
 	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
 		return false;
 
-	return intel_engine_has_semaphores(engine);
+	return (intel_engine_has_semaphores(engine) &&
+		intel_engine_has_preemption(engine));
 }
 
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index 848decee9066..b84fdd722781 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -121,6 +121,11 @@ static void set_scheduler_caps(struct drm_i915_private *i915)
 			else
 				disabled |= BIT(map[i].sched);
 		}
+
+		if (intel_engine_has_timeslices(engine))
+			enabled |= I915_SCHEDULER_CAP_TIMESLICING;
+		else
+			disabled |= I915_SCHEDULER_CAP_TIMESLICING;
 	}
 
 	i915->caps.scheduler = enabled & ~disabled;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a24817ca25b..ff7be293ec31 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -523,6 +523,7 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
 #define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
 #define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS	(1ul << 4)
+#define   I915_SCHEDULER_CAP_TIMESLICING	(1ul << 5)
 
 #define I915_PARAM_HUC_STATUS		 42
 
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 22/22] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (19 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 21/22] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
@ 2020-03-02  8:58 ` Chris Wilson
  2020-03-02 13:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/22] drm/i915/gem: Consolidate ctx->engines[] release Patchwork
                   ` (3 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02  8:58 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kenneth Graunke

If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the
user batch or in our own preamble, the engine raises a
GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so
respond to a semaphore wait by yielding the timeslice, if we have
another context to yield to!

The only real complication is that the interrupt is only generated for
the start of the semaphore wait, and is asynchronous to our
process_csb() -- that is, we may not have registered the timeslice before
we see the interrupt. To ensure we don't miss a potential semaphore
blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark
the interrupt and apply it to the next timeslice regardless of whether it
was active at the time.

v2: We use semaphores in preempt-to-busy, within the timeslicing
implementation itself! Ergo, when we do insert a preemption due to an
expired timeslice, the new context may start with the missed semaphore
flagged by the retired context and be yielded, ad infinitum. To avoid
this, read the context id at the time of the semaphore interrupt and
only yield if that context is still active.

Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  6 +++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 +++++
 drivers/gpu/drm/i915/gt/intel_gt_irq.c       | 13 ++++++-
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 40 +++++++++++++++++---
 drivers/gpu/drm/i915/i915_reg.h              |  1 +
 5 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 53ac3f00909a..c93a805eddfb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1290,6 +1290,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 
 	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
 		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+	if (HAS_EXECLISTS(dev_priv)) {
+		drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+		drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+	}
 	drm_printf(m, "\tRING_START: 0x%08x\n",
 		   ENGINE_READ(engine, RING_START));
 	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4dfaaaa32e32..1ace7cdf332f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -156,6 +156,15 @@ struct intel_engine_execlists {
 	 */
 	struct i915_priolist default_priolist;
 
+	/**
+	 * @yield: CCID at the time of the last semaphore-wait interrupt.
+	 *
+	 * Instead of leaving a semaphore busy-spinning on an engine, we would
+	 * like to switch to another ready context, i.e. yielding the semaphore
+	 * timeslice.
+	 */
+	u32 yield;
+
 	/**
 	 * @error_interrupt: CS Master EIR
 	 *
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f0e7fd95165a..875bd0392ffc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -39,6 +39,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 		}
 	}
 
+	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+		WRITE_ONCE(engine->execlists.yield,
+			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+		if (del_timer(&engine->execlists.timer))
+			tasklet = true;
+	}
+
 	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
 		tasklet = true;
 
@@ -228,7 +235,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
 	const u32 irqs =
 		GT_CS_MASTER_ERROR_INTERRUPT |
 		GT_RENDER_USER_INTERRUPT |
-		GT_CONTEXT_SWITCH_INTERRUPT;
+		GT_CONTEXT_SWITCH_INTERRUPT |
+		GT_WAIT_SEMAPHORE_INTERRUPT;
 	struct intel_uncore *uncore = gt->uncore;
 	const u32 dmask = irqs << 16 | irqs;
 	const u32 smask = irqs << 16;
@@ -366,7 +374,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
 	const u32 irqs =
 		GT_CS_MASTER_ERROR_INTERRUPT |
 		GT_RENDER_USER_INTERRUPT |
-		GT_CONTEXT_SWITCH_INTERRUPT;
+		GT_CONTEXT_SWITCH_INTERRUPT |
+		GT_WAIT_SEMAPHORE_INTERRUPT;
 	const u32 gt_interrupts[] = {
 		irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
 		irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c70dc4cfc4f3..e717bc644700 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1749,7 +1749,8 @@ static void defer_active(struct intel_engine_cs *engine)
 }
 
 static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+	       const struct i915_request *rq)
 {
 	int hint;
 
@@ -1765,6 +1766,31 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
 	return hint >= effective_prio(rq);
 }
 
+static bool
+timeslice_yield(const struct intel_engine_execlists *el,
+		const struct i915_request *rq)
+{
+	/*
+	 * Once bitten, forever smitten!
+	 *
+	 * If the active context ever busy-waited on a semaphore,
+	 * it will be treated as a hog until the end of its timeslice.
+	 * The HW only sends an interrupt on the first miss, and we
+	 * do know if that semaphore has been signaled, or even if it
+	 * is now stuck on another semaphore. Play safe, yield if it
+	 * might be stuck -- it will be given a fresh timeslice in
+	 * the near future.
+	 */
+	return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield);
+}
+
+static bool
+timeslice_expired(const struct intel_engine_execlists *el,
+		  const struct i915_request *rq)
+{
+	return timer_expired(&el->timer) || timeslice_yield(el, rq);
+}
+
 static int
 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
 {
@@ -1780,8 +1806,7 @@ timeslice(const struct intel_engine_cs *engine)
 	return READ_ONCE(engine->props.timeslice_duration_ms);
 }
 
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
 {
 	const struct i915_request *rq = *engine->execlists.active;
 
@@ -1994,13 +2019,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 			last = NULL;
 		} else if (need_timeslice(engine, last) &&
-			   timer_expired(&engine->execlists.timer)) {
+			   timeslice_expired(execlists, last)) {
 			ENGINE_TRACE(engine,
-				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
+				     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
 				     last->fence.context,
 				     last->fence.seqno,
 				     last->sched.attr.priority,
-				     execlists->queue_priority_hint);
+				     execlists->queue_priority_hint,
+				     yesno(timeslice_yield(execlists, last)));
 
 			ring_set_paused(engine, 1);
 			defer_active(engine);
@@ -2262,6 +2288,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		}
 		clear_ports(port + 1, last_port - port);
 
+		WRITE_ONCE(execlists->yield, -1);
 		execlists_submit_ports(engine);
 		set_preempt_timeout(engine);
 	} else {
@@ -4528,6 +4555,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 	engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
+	engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 72de9591f77f..7677016cf18c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3092,6 +3092,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GT_BSD_CS_ERROR_INTERRUPT		(1 << 15)
 #define GT_BSD_USER_INTERRUPT			(1 << 12)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1	(1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
+#define GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11) /* bdw+ */
 #define GT_CONTEXT_SWITCH_INTERRUPT		(1 <<  8)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT	(1 <<  5) /* !snb */
 #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
-- 
2.25.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 06/22] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 06/22] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl Chris Wilson
@ 2020-03-02  9:39   ` Maarten Lankhorst
  0 siblings, 0 replies; 37+ messages in thread
From: Maarten Lankhorst @ 2020-03-02  9:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Op 02-03-2020 om 09:57 schreef Chris Wilson:
> As we no longer stash anything inside i915_vma under the exclusive
> protection of struct_mutex, we do not need to revoke the i915_vma
> stashes before dropping struct_mutex to handle pagefaults. Knowing that
> we must drop the struct_mutex while keeping the eb->vma around, means
> that we are required to hold onto to the object reference until we have
> marked the vma as active.
>
> Fixes: 155ab8836caa ("drm/i915: Move object close under its own lock")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>

I can work with this,

For patch 4 5 and 6

Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>


> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 107 +++++++-----------
>  1 file changed, 42 insertions(+), 65 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 9c888561a636..577c84872acc 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -47,17 +47,15 @@ enum {
>  #define DBG_FORCE_RELOC 0 /* choose one of the above! */
>  };
>  
> -#define __EXEC_OBJECT_HAS_REF		BIT(31)
> -#define __EXEC_OBJECT_HAS_PIN		BIT(30)
> -#define __EXEC_OBJECT_HAS_FENCE		BIT(29)
> -#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
> -#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
> -#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
> +#define __EXEC_OBJECT_HAS_PIN		BIT(31)
> +#define __EXEC_OBJECT_HAS_FENCE		BIT(30)
> +#define __EXEC_OBJECT_NEEDS_MAP		BIT(29)
> +#define __EXEC_OBJECT_NEEDS_BIAS	BIT(28)
> +#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 28) /* all of the above */
>  #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
>  
>  #define __EXEC_HAS_RELOC	BIT(31)
> -#define __EXEC_VALIDATED	BIT(30)
> -#define __EXEC_INTERNAL_FLAGS	(~0u << 30)
> +#define __EXEC_INTERNAL_FLAGS	(~0u << 31)
>  #define UPDATE			PIN_OFFSET_FIXED
>  
>  #define BATCH_OFFSET_BIAS (256*1024)
> @@ -472,24 +470,17 @@ eb_validate_vma(struct i915_execbuffer *eb,
>  	return 0;
>  }
>  
> -static int
> +static void
>  eb_add_vma(struct i915_execbuffer *eb,
>  	   unsigned int i, unsigned batch_idx,
>  	   struct i915_vma *vma)
>  {
>  	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
>  	struct eb_vma *ev = &eb->vma[i];
> -	int err;
>  
>  	GEM_BUG_ON(i915_vma_is_closed(vma));
>  
> -	if (!(eb->args->flags & __EXEC_VALIDATED)) {
> -		err = eb_validate_vma(eb, entry, vma);
> -		if (unlikely(err))
> -			return err;
> -	}
> -
> -	ev->vma = vma;
> +	ev->vma = i915_vma_get(vma);
>  	ev->exec = entry;
>  	ev->flags = entry->flags;
>  
> @@ -522,7 +513,6 @@ eb_add_vma(struct i915_execbuffer *eb,
>  		eb->batch = ev;
>  	}
>  
> -	err = 0;
>  	if (eb_pin_vma(eb, entry, ev)) {
>  		if (entry->offset != vma->node.start) {
>  			entry->offset = vma->node.start | UPDATE;
> @@ -530,12 +520,8 @@ eb_add_vma(struct i915_execbuffer *eb,
>  		}
>  	} else {
>  		eb_unreserve_vma(ev);
> -
>  		list_add_tail(&ev->bind_link, &eb->unbound);
> -		if (drm_mm_node_allocated(&vma->node))
> -			err = i915_vma_unbind(vma);
>  	}
> -	return err;
>  }
>  
>  static inline int use_cpu_reloc(const struct reloc_cache *cache,
> @@ -582,6 +568,13 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
>  	else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
>  		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
>  
> +	if (drm_mm_node_allocated(&vma->node) &&
> +	    eb_vma_misplaced(entry, vma, ev->flags)) {
> +		err = i915_vma_unbind(vma);
> +		if (err)
> +			return err;
> +	}
> +
>  	err = i915_vma_pin(vma,
>  			   entry->pad_to_size, entry->alignment,
>  			   pin_flags);
> @@ -641,7 +634,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
>  			if (err)
>  				break;
>  		}
> -		if (err != -ENOSPC)
> +		if (!(err == -ENOSPC || err == -EAGAIN))
>  			return err;
>  
>  		/* Resort *all* the objects into priority order */
> @@ -672,6 +665,11 @@ static int eb_reserve(struct i915_execbuffer *eb)
>  		}
>  		list_splice_tail(&last, &eb->unbound);
>  
> +		if (err == -EAGAIN) {
> +			flush_workqueue(eb->i915->mm.userptr_wq);
> +			continue;
> +		}
> +
>  		switch (pass++) {
>  		case 0:
>  			break;
> @@ -727,17 +725,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>  	unsigned int i, batch;
>  	int err;
>  
> +	if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
> +		return -ENOENT;
> +
>  	INIT_LIST_HEAD(&eb->relocs);
>  	INIT_LIST_HEAD(&eb->unbound);
>  
>  	batch = eb_batch_index(eb);
>  
> -	mutex_lock(&eb->gem_context->mutex);
> -	if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
> -		err = -ENOENT;
> -		goto err_ctx;
> -	}
> -
>  	for (i = 0; i < eb->buffer_count; i++) {
>  		u32 handle = eb->exec[i].handle;
>  		struct i915_lut_handle *lut;
> @@ -782,25 +777,19 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>  		i915_gem_object_unlock(obj);
>  
>  add_vma:
> -		err = eb_add_vma(eb, i, batch, vma);
> +		err = eb_validate_vma(eb, &eb->exec[i], vma);
>  		if (unlikely(err))
>  			goto err_vma;
>  
> -		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
> -			   eb_vma_misplaced(&eb->exec[i], vma, eb->vma[i].flags));
> +		eb_add_vma(eb, i, batch, vma);
>  	}
>  
> -	mutex_unlock(&eb->gem_context->mutex);
> -
> -	eb->args->flags |= __EXEC_VALIDATED;
> -	return eb_reserve(eb);
> +	return 0;
>  
>  err_obj:
>  	i915_gem_object_put(obj);
>  err_vma:
>  	eb->vma[i].vma = NULL;
> -err_ctx:
> -	mutex_unlock(&eb->gem_context->mutex);
>  	return err;
>  }
>  
> @@ -841,19 +830,10 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
>  		if (ev->flags & __EXEC_OBJECT_HAS_PIN)
>  			__eb_unreserve_vma(vma, ev->flags);
>  
> -		if (ev->flags & __EXEC_OBJECT_HAS_REF)
> -			i915_vma_put(vma);
> +		i915_vma_put(vma);
>  	}
>  }
>  
> -static void eb_reset_vmas(const struct i915_execbuffer *eb)
> -{
> -	eb_release_vmas(eb);
> -	if (eb->lut_size > 0)
> -		memset(eb->buckets, 0,
> -		       sizeof(struct hlist_head) << eb->lut_size);
> -}
> -
>  static void eb_destroy(const struct i915_execbuffer *eb)
>  {
>  	GEM_BUG_ON(eb->reloc_cache.rq);
> @@ -1662,8 +1642,6 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>  		goto out;
>  	}
>  
> -	/* We may process another execbuffer during the unlock... */
> -	eb_reset_vmas(eb);
>  	mutex_unlock(&dev->struct_mutex);
>  
>  	/*
> @@ -1702,11 +1680,6 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>  		goto out;
>  	}
>  
> -	/* reacquire the objects */
> -	err = eb_lookup_vmas(eb);
> -	if (err)
> -		goto err;
> -
>  	GEM_BUG_ON(!eb->batch);
>  
>  	list_for_each_entry(ev, &eb->relocs, reloc_link) {
> @@ -1757,8 +1730,17 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>  
>  static int eb_relocate(struct i915_execbuffer *eb)
>  {
> -	if (eb_lookup_vmas(eb))
> -		goto slow;
> +	int err;
> +
> +	mutex_lock(&eb->gem_context->mutex);
> +	err = eb_lookup_vmas(eb);
> +	mutex_unlock(&eb->gem_context->mutex);
> +	if (err)
> +		return err;
> +
> +	err = eb_reserve(eb);
> +	if (err)
> +		return err;
>  
>  	/* The objects are in their final locations, apply the relocations. */
>  	if (eb->args->flags & __EXEC_HAS_RELOC) {
> @@ -1766,14 +1748,11 @@ static int eb_relocate(struct i915_execbuffer *eb)
>  
>  		list_for_each_entry(ev, &eb->relocs, reloc_link) {
>  			if (eb_relocate_vma(eb, ev))
> -				goto slow;
> +				return eb_relocate_slow(eb);
>  		}
>  	}
>  
>  	return 0;
> -
> -slow:
> -	return eb_relocate_slow(eb);
>  }
>  
>  static int eb_move_to_gpu(struct i915_execbuffer *eb)
> @@ -1855,8 +1834,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>  		i915_vma_unlock(vma);
>  
>  		__eb_unreserve_vma(vma, flags);
> -		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
> -			i915_vma_put(vma);
> +		i915_vma_put(vma);
>  
>  		ev->vma = NULL;
>  	}
> @@ -2116,8 +2094,7 @@ static int eb_parse(struct i915_execbuffer *eb)
>  		goto err_trampoline;
>  
>  	eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
> -	eb->vma[eb->buffer_count].flags =
> -		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
> +	eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
>  	eb->batch = &eb->vma[eb->buffer_count++];
>  
>  	eb->trampoline = trampoline;


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 07/22] drm/i915/perf: Reintroduce wait on OA configuration completion
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 07/22] drm/i915/perf: Reintroduce wait on OA configuration completion Chris Wilson
@ 2020-03-02 10:29   ` Lionel Landwerlin
  0 siblings, 0 replies; 37+ messages in thread
From: Lionel Landwerlin @ 2020-03-02 10:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 02/03/2020 10:57, Chris Wilson wrote:
> We still need to wait for the initial OA configuration to happen
> before we enable OA report writes to the OA buffer.
>
> Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Fixes: 15d0ace1f876 ("drm/i915/perf: execute OA configuration from command stream")
> Testcase: igt/perf/stream-open-close
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>


Works for me.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>


Thanks,


-Lione


> ---
> This is simply an alternative to storing the request inside the stream.
> ---
>   drivers/gpu/drm/i915/i915_perf.c       | 58 ++++++++++++++++++--------
>   drivers/gpu/drm/i915/i915_perf_types.h |  3 +-
>   2 files changed, 43 insertions(+), 18 deletions(-)
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/22] drm/i915/gem: Consolidate ctx->engines[] release
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (20 preceding siblings ...)
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 22/22] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
@ 2020-03-02 13:58 ` Patchwork
  2020-03-02 14:18 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
                   ` (2 subsequent siblings)
  24 siblings, 0 replies; 37+ messages in thread
From: Patchwork @ 2020-03-02 13:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/22] drm/i915/gem: Consolidate ctx->engines[] release
URL   : https://patchwork.freedesktop.org/series/74131/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
15d2ef066485 drm/i915/gem: Consolidate ctx->engines[] release
efe30d6b961f drm/i915/gt: Prevent allocation on a banned context
273e8c952a87 drm/i915/gem: Check that the context wasn't closed during setup
a9e51e63509e drm/i915: Drop inspection of execbuf flags during evict
abe716d0c7b1 drm/i915/gem: Extract transient execbuf flags from i915_vma
785f2f840a4b drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl
4bc4e5b8bf9b drm/i915/perf: Reintroduce wait on OA configuration completion
80532d70b271 drm/i915: Wrap i915_active in a simple kreffed struct
d9379944ec2d drm/i915: Extend i915_request_await_active to use all timelines
c217c503b903 drm/i915/perf: Schedule oa_config after modifying the contexts
44ec5f8e5c7c drm/i915/selftests: Add request throughput measurement to perf
-:90: WARNING:LINE_SPACING: Missing a blank line after declarations
#90: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1515:
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);

-:157: WARNING:LINE_SPACING: Missing a blank line after declarations
#157: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1582:
+	struct intel_context *ce;
+	IGT_TIMEOUT(end_time);

-:213: WARNING:LINE_SPACING: Missing a blank line after declarations
#213: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1638:
+	struct drm_i915_private *i915 = arg;
+	static int (* const func[])(void *arg) = {

-:220: WARNING:LINE_SPACING: Missing a blank line after declarations
#220: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1645:
+	struct intel_engine_cs *engine;
+	int (* const *fn)(void *arg);

-:265: WARNING:YIELD: Using yield() is generally wrong. See yield() kernel-doc (sched/core.c)
#265: FILE: drivers/gpu/drm/i915/selftests/i915_request.c:1690:
+		yield(); /* start all threads before we kthread_stop() */

total: 0 errors, 5 warnings, 0 checks, 306 lines checked
d92fc6dcd157 drm/i915/execlists: Check the sentinel is alone in the ELSP
304b7730d4f7 drm/i915/execlists: Reduce preempt-to-busy roundtrip delay
ec6462245001 dma-buf: Prettify typecasts for dma-fence-chain
0c794e11ae51 dma-buf: Report signaled links inside dma-fence-chain
7a77040c1c21 dma-buf: Exercise dma-fence-chain under selftests
-:33: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#33: 
new file mode 100644

-:61: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#61: FILE: drivers/dma-buf/st-dma-fence-chain.c:24:
+	spinlock_t lock;

-:235: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'find_seqno', this function's name, in a string
#235: FILE: drivers/dma-buf/st-dma-fence-chain.c:198:
+		pr_err("Reported %d for find_seqno(0)!\n", err);

-:244: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'find_seqno', this function's name, in a string
#244: FILE: drivers/dma-buf/st-dma-fence-chain.c:207:
+			pr_err("Reported %d for find_seqno(%d:%d)!\n",

-:249: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'find_seqno', this function's name, in a string
#249: FILE: drivers/dma-buf/st-dma-fence-chain.c:212:
+			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",

-:272: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'find_seqno', this function's name, in a string
#272: FILE: drivers/dma-buf/st-dma-fence-chain.c:235:
+			pr_err("Error not reported for future fence: find_seqno(%d:%d)!\n",

-:286: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'find_seqno', this function's name, in a string
#286: FILE: drivers/dma-buf/st-dma-fence-chain.c:249:
+			pr_err("Incorrect fence reported by find_seqno(%d:%d)\n",

-:737: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'dma_fence_chain', this function's name, in a string
#737: FILE: drivers/dma-buf/st-dma-fence-chain.c:700:
+	pr_info("sizeof(dma_fence_chain)=%zu\n",

total: 0 errors, 7 warnings, 1 checks, 725 lines checked
2f9c8ffce272 dma-buf: Proxy fence, an unsignaled fence placeholder
-:45: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#45: 
new file mode 100644

-:93: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#93: FILE: drivers/dma-buf/dma-fence-proxy.c:18:
+	spinlock_t lock;

-:320: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#320: FILE: drivers/dma-buf/st-dma-fence-proxy.c:20:
+	spinlock_t lock;

-:480: WARNING:MEMORY_BARRIER: memory barrier without comment
#480: FILE: drivers/dma-buf/st-dma-fence-proxy.c:180:
+	smp_store_mb(container_of(cb, struct simple_cb, cb)->seen, true);

total: 0 errors, 2 warnings, 2 checks, 811 lines checked
e95e336bec8e drm/syncobj: Allow use of dma-fence-proxy
36dadb2f23d6 drm/i915/gem: Teach execbuf how to wait on future syncobj
a4595a6bbbfb drm/i915/gem: Allow combining submit-fences with syncobj
0fe67463719f drm/i915/gt: Declare when we enabled timeslicing
9d36cbcd1c36 drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 12/22] drm/i915/execlists: Check the sentinel is alone in the ELSP
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 12/22] drm/i915/execlists: Check the sentinel is alone in the ELSP Chris Wilson
@ 2020-03-02 14:04   ` Mika Kuoppala
  0 siblings, 0 replies; 37+ messages in thread
From: Mika Kuoppala @ 2020-03-02 14:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We only use sentinel requests for "preempt-to-idle" passes, so assert
> that they are the only request in a new submission.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

From other thread,

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 21 +++++++++++++++++++++
>  1 file changed, 21 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 15d56cd3937e..b9b3f78f1324 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1448,6 +1448,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
>  {
>  	struct i915_request * const *port, *rq;
>  	struct intel_context *ce = NULL;
> +	bool sentinel = false;
>  
>  	trace_ports(execlists, msg, execlists->pending);
>  
> @@ -1481,6 +1482,26 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
>  		}
>  		ce = rq->context;
>  
> +		/*
> +		 * Sentinels are supposed to be lonely so they flush the
> +		 * current exection off the HW. Check that they are the
> +		 * only request in the pending submission.
> +		 */
> +		if (sentinel) {
> +			GEM_TRACE_ERR("context:%llx after sentinel in pending[%zd]\n",
> +				      ce->timeline->fence_context,
> +				      port - execlists->pending);
> +			return false;
> +		}
> +
> +		sentinel = i915_request_has_sentinel(rq);
> +		if (sentinel && port != execlists->pending) {
> +			GEM_TRACE_ERR("sentinel context:%llx not in prime position[%zd]\n",
> +				      ce->timeline->fence_context,
> +				      port - execlists->pending);
> +			return false;
> +		}
> +
>  		/* Hold tightly onto the lock to prevent concurrent retires! */
>  		if (!spin_trylock_irqsave(&rq->lock, flags))
>  			continue;
> -- 
> 2.25.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Intel-gfx] ✗ Fi.CI.DOCS: warning for series starting with [01/22] drm/i915/gem: Consolidate ctx->engines[] release
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (21 preceding siblings ...)
  2020-03-02 13:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/22] drm/i915/gem: Consolidate ctx->engines[] release Patchwork
@ 2020-03-02 14:18 ` Patchwork
  2020-03-02 14:24 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
  2020-03-02 18:29 ` [Intel-gfx] [PATCH 01/22] " Mika Kuoppala
  24 siblings, 0 replies; 37+ messages in thread
From: Patchwork @ 2020-03-02 14:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/22] drm/i915/gem: Consolidate ctx->engines[] release
URL   : https://patchwork.freedesktop.org/series/74131/
State : warning

== Summary ==

$ make htmldocs 2>&1 > /dev/null | grep i915
Error: Cannot open file ./drivers/gpu/drm/i915/intel_csr.c
Error: Cannot open file ./drivers/gpu/drm/i915/intel_csr.c
Error: Cannot open file ./drivers/gpu/drm/i915/intel_csr.c
./drivers/gpu/drm/i915/i915_vma.h:1: warning: 'Virtual Memory Address' not found
./drivers/gpu/drm/i915/i915_gem_gtt.c:1: warning: 'Global GTT views' not found
WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -function csr support for dmc ./drivers/gpu/drm/i915/intel_csr.c' failed with return code 1
WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -internal ./drivers/gpu/drm/i915/intel_csr.c' failed with return code 2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [01/22] drm/i915/gem: Consolidate ctx->engines[] release
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (22 preceding siblings ...)
  2020-03-02 14:18 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
@ 2020-03-02 14:24 ` Patchwork
  2020-03-02 18:29 ` [Intel-gfx] [PATCH 01/22] " Mika Kuoppala
  24 siblings, 0 replies; 37+ messages in thread
From: Patchwork @ 2020-03-02 14:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/22] drm/i915/gem: Consolidate ctx->engines[] release
URL   : https://patchwork.freedesktop.org/series/74131/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8043 -> Patchwork_16777
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_16777 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_16777, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16777/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_16777:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@blt:
    - fi-snb-2600:        [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8043/fi-snb-2600/igt@i915_selftest@live@blt.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16777/fi-snb-2600/igt@i915_selftest@live@blt.html

  
New tests
---------

  New tests have been introduced between CI_DRM_8043 and Patchwork_16777:

### New IGT tests (2) ###

  * igt@dmabuf@all@dma_fence_chain:
    - Statuses : 41 pass(s)
    - Exec time: [7.48, 28.68] s

  * igt@dmabuf@all@dma_fence_proxy:
    - Statuses : 41 pass(s)
    - Exec time: [0.02, 0.09] s

  

Known issues
------------

  Here are the changes found in Patchwork_16777 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_flink_basic@flink-lifetime:
    - fi-tgl-y:           [PASS][3] -> [DMESG-WARN][4] ([CI#94] / [i915#402])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8043/fi-tgl-y/igt@gem_flink_basic@flink-lifetime.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16777/fi-tgl-y/igt@gem_flink_basic@flink-lifetime.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s4-devices:
    - fi-tgl-y:           [FAIL][5] ([CI#94]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8043/fi-tgl-y/igt@gem_exec_suspend@basic-s4-devices.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16777/fi-tgl-y/igt@gem_exec_suspend@basic-s4-devices.html

  * igt@i915_selftest@live@execlists:
    - fi-icl-y:           [DMESG-FAIL][7] ([fdo#108569]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8043/fi-icl-y/igt@i915_selftest@live@execlists.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16777/fi-icl-y/igt@i915_selftest@live@execlists.html

  * igt@kms_addfb_basic@bad-pitch-999:
    - fi-tgl-y:           [DMESG-WARN][9] ([CI#94] / [i915#402]) -> [PASS][10] +1 similar issue
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8043/fi-tgl-y/igt@kms_addfb_basic@bad-pitch-999.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16777/fi-tgl-y/igt@kms_addfb_basic@bad-pitch-999.html

  
#### Warnings ####

  * igt@runner@aborted:
    - fi-kbl-8809g:       [FAIL][11] ([i915#192] / [i915#193] / [i915#194]) -> [FAIL][12] ([i915#1209])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8043/fi-kbl-8809g/igt@runner@aborted.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16777/fi-kbl-8809g/igt@runner@aborted.html

  
  [CI#94]: https://gitlab.freedesktop.org/gfx-ci/i915-infra/issues/94
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [i915#1209]: https://gitlab.freedesktop.org/drm/intel/issues/1209
  [i915#192]: https://gitlab.freedesktop.org/drm/intel/issues/192
  [i915#193]: https://gitlab.freedesktop.org/drm/intel/issues/193
  [i915#194]: https://gitlab.freedesktop.org/drm/intel/issues/194
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402


Participating hosts (48 -> 42)
------------------------------

  Additional (4): fi-glk-dsi fi-kbl-7560u fi-byt-n2820 fi-elk-e7500 
  Missing    (10): fi-kbl-soraka fi-ilk-m540 fi-bsw-n3050 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-bwr-2160 fi-ctg-p8600 fi-gdg-551 fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8043 -> Patchwork_16777

  CI-20190529: 20190529
  CI_DRM_8043: 7e5119254441cdf0764418bbf3f43f6547d30a8a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5483: 1707153df224ffb6333c6c660a792b7f334eb3d3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_16777: 9d36cbcd1c369c53f5fef0e084387caee2e9f0ac @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

9d36cbcd1c36 drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore
0fe67463719f drm/i915/gt: Declare when we enabled timeslicing
a4595a6bbbfb drm/i915/gem: Allow combining submit-fences with syncobj
36dadb2f23d6 drm/i915/gem: Teach execbuf how to wait on future syncobj
e95e336bec8e drm/syncobj: Allow use of dma-fence-proxy
2f9c8ffce272 dma-buf: Proxy fence, an unsignaled fence placeholder
7a77040c1c21 dma-buf: Exercise dma-fence-chain under selftests
0c794e11ae51 dma-buf: Report signaled links inside dma-fence-chain
ec6462245001 dma-buf: Prettify typecasts for dma-fence-chain
304b7730d4f7 drm/i915/execlists: Reduce preempt-to-busy roundtrip delay
d92fc6dcd157 drm/i915/execlists: Check the sentinel is alone in the ELSP
44ec5f8e5c7c drm/i915/selftests: Add request throughput measurement to perf
c217c503b903 drm/i915/perf: Schedule oa_config after modifying the contexts
d9379944ec2d drm/i915: Extend i915_request_await_active to use all timelines
80532d70b271 drm/i915: Wrap i915_active in a simple kreffed struct
4bc4e5b8bf9b drm/i915/perf: Reintroduce wait on OA configuration completion
785f2f840a4b drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl
abe716d0c7b1 drm/i915/gem: Extract transient execbuf flags from i915_vma
a9e51e63509e drm/i915: Drop inspection of execbuf flags during evict
273e8c952a87 drm/i915/gem: Check that the context wasn't closed during setup
efe30d6b961f drm/i915/gt: Prevent allocation on a banned context
15d2ef066485 drm/i915/gem: Consolidate ctx->engines[] release

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16777/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 14/22] dma-buf: Prettify typecasts for dma-fence-chain
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 14/22] dma-buf: Prettify typecasts for dma-fence-chain Chris Wilson
@ 2020-03-02 16:11   ` Mika Kuoppala
  0 siblings, 0 replies; 37+ messages in thread
From: Mika Kuoppala @ 2020-03-02 16:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Inside dma-fence-chain, we use a cmpxchg on an RCU-protected pointer. To
> avoid the sparse warning for using the RCU pointer directly, we have to
> cast away the __rcu annotation. However, we don't need to use void*
> everywhere and can stick to the dma_fence*.

Prolly just my english but
s/everywhere/anywhere.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/dma-buf/dma-fence-chain.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
> index 44a741677d25..3d123502ff12 100644
> --- a/drivers/dma-buf/dma-fence-chain.c
> +++ b/drivers/dma-buf/dma-fence-chain.c
> @@ -62,7 +62,8 @@ struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>  			replacement = NULL;
>  		}
>  
> -		tmp = cmpxchg((void **)&chain->prev, (void *)prev, (void *)replacement);
> +		tmp = cmpxchg((struct dma_fence __force **)&chain->prev,
> +			      prev, replacement);
>  		if (tmp == prev)
>  			dma_fence_put(tmp);
>  		else
> -- 
> 2.25.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 08/22] drm/i915: Wrap i915_active in a simple kreffed struct
  2020-03-02  8:57 ` [Intel-gfx] [PATCH 08/22] drm/i915: Wrap i915_active in a simple kreffed struct Chris Wilson
@ 2020-03-02 16:18   ` Mika Kuoppala
  2020-03-02 16:23     ` Chris Wilson
  0 siblings, 1 reply; 37+ messages in thread
From: Mika Kuoppala @ 2020-03-02 16:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> For conveniences of callers that just want to use an i915_active to
> track a wide array of concurrent timelines, wrap the base i915_active
> struct inside a kref. This i915_active will self-destruct after use.
>

I looks ok and I would buy this. However I didn't manage to
find user.
-Mika


> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_active.c | 53 ++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_active.h |  4 +++
>  2 files changed, 57 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 7b3d6c12ad61..1826de14d2da 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -881,6 +881,59 @@ void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
>  	active_fence_cb(fence, cb);
>  }
>  
> +struct auto_active {
> +	struct i915_active base;
> +	struct kref ref;
> +};
> +
> +struct i915_active *i915_active_get(struct i915_active *ref)
> +{
> +	struct auto_active *aa = container_of(ref, typeof(*aa), base);
> +
> +	kref_get(&aa->ref);
> +	return &aa->base;
> +}
> +
> +static void auto_release(struct kref *ref)
> +{
> +	struct auto_active *aa = container_of(ref, typeof(*aa), ref);
> +
> +	i915_active_fini(&aa->base);
> +	kfree(aa);
> +}
> +
> +void i915_active_put(struct i915_active *ref)
> +{
> +	struct auto_active *aa = container_of(ref, typeof(*aa), base);
> +
> +	kref_put(&aa->ref, auto_release);
> +}
> +
> +static int auto_active(struct i915_active *ref)
> +{
> +	i915_active_get(ref);
> +	return 0;
> +}
> +
> +static void auto_retire(struct i915_active *ref)
> +{
> +	i915_active_put(ref);
> +}
> +
> +struct i915_active *i915_active_create(void)
> +{
> +	struct auto_active *aa;
> +
> +	aa = kmalloc(sizeof(*aa), GFP_KERNEL);
> +	if (!aa)
> +		return NULL;
> +
> +	kref_init(&aa->ref);
> +	i915_active_init(&aa->base, auto_active, auto_retire);
> +
> +	return &aa->base;
> +}
> +
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>  #include "selftests/i915_active.c"
>  #endif
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 973ff0447c6c..7e438501333e 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -215,4 +215,8 @@ void i915_request_add_active_barriers(struct i915_request *rq);
>  void i915_active_print(struct i915_active *ref, struct drm_printer *m);
>  void i915_active_unlock_wait(struct i915_active *ref);
>  
> +struct i915_active *i915_active_create(void);
> +struct i915_active *i915_active_get(struct i915_active *ref);
> +void i915_active_put(struct i915_active *ref);
> +
>  #endif /* _I915_ACTIVE_H_ */
> -- 
> 2.25.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 08/22] drm/i915: Wrap i915_active in a simple kreffed struct
  2020-03-02 16:18   ` Mika Kuoppala
@ 2020-03-02 16:23     ` Chris Wilson
  2020-03-02 16:34       ` Mika Kuoppala
  0 siblings, 1 reply; 37+ messages in thread
From: Chris Wilson @ 2020-03-02 16:23 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-03-02 16:18:48)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > For conveniences of callers that just want to use an i915_active to
> > track a wide array of concurrent timelines, wrap the base i915_active
> > struct inside a kref. This i915_active will self-destruct after use.
> >
> 
> I looks ok and I would buy this. However I didn't manage to
> find user.

Did you look at the next patch?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 08/22] drm/i915: Wrap i915_active in a simple kreffed struct
  2020-03-02 16:23     ` Chris Wilson
@ 2020-03-02 16:34       ` Mika Kuoppala
  2020-03-02 16:42         ` Chris Wilson
  0 siblings, 1 reply; 37+ messages in thread
From: Mika Kuoppala @ 2020-03-02 16:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2020-03-02 16:18:48)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > For conveniences of callers that just want to use an i915_active to
>> > track a wide array of concurrent timelines, wrap the base i915_active
>> > struct inside a kref. This i915_active will self-destruct after use.
>> >
>> 
>> I looks ok and I would buy this. However I didn't manage to
>> find user.
>
> Did you look at the next patch?

I did! I even searched through it.

I will take a second look.
-Mika

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 08/22] drm/i915: Wrap i915_active in a simple kreffed struct
  2020-03-02 16:34       ` Mika Kuoppala
@ 2020-03-02 16:42         ` Chris Wilson
  0 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02 16:42 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-03-02 16:34:17)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Quoting Mika Kuoppala (2020-03-02 16:18:48)
> >> Chris Wilson <chris@chris-wilson.co.uk> writes:
> >> 
> >> > For conveniences of callers that just want to use an i915_active to
> >> > track a wide array of concurrent timelines, wrap the base i915_active
> >> > struct inside a kref. This i915_active will self-destruct after use.
> >> >
> >> 
> >> I looks ok and I would buy this. However I didn't manage to
> >> find user.
> >
> > Did you look at the next patch?
> 
> I did! I even searched through it.
> 
> I will take a second look.

The next-but-one patch then.

But patch 1/22 is the most important atm :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release
  2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
                   ` (23 preceding siblings ...)
  2020-03-02 14:24 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
@ 2020-03-02 18:29 ` Mika Kuoppala
  2020-03-02 21:43   ` Chris Wilson
  24 siblings, 1 reply; 37+ messages in thread
From: Mika Kuoppala @ 2020-03-02 18:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Use the same engine_idle_release() routine for cleaning all old
> ctx->engine[] state, closing any potential races with concurrent execbuf
> submission.
>
> v2ish: Use the ce->pin_count to close the execbuf gap.
>
> Closes: https://gitlab.freedesktop.org/drm/intel/issues/1241
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 193 +++++++++---------
>  drivers/gpu/drm/i915/gem/i915_gem_context.h   |   1 -
>  .../gpu/drm/i915/gem/selftests/mock_context.c |   3 +
>  3 files changed, 105 insertions(+), 92 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index e525ead073f7..cb6b6be48978 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -242,7 +242,6 @@ static void __free_engines(struct i915_gem_engines *e, unsigned int count)
>  		if (!e->engines[count])
>  			continue;
>  
> -		RCU_INIT_POINTER(e->engines[count]->gem_context, NULL);
>  		intel_context_put(e->engines[count]);
>  	}
>  	kfree(e);
> @@ -255,7 +254,11 @@ static void free_engines(struct i915_gem_engines *e)
>  
>  static void free_engines_rcu(struct rcu_head *rcu)
>  {
> -	free_engines(container_of(rcu, struct i915_gem_engines, rcu));
> +	struct i915_gem_engines *engines =
> +		container_of(rcu, struct i915_gem_engines, rcu);
> +
> +	i915_sw_fence_fini(&engines->fence);
> +	free_engines(engines);
>  }
>  
>  static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
> @@ -269,8 +272,6 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
>  	if (!e)
>  		return ERR_PTR(-ENOMEM);
>  
> -	e->ctx = ctx;
> -
>  	for_each_engine(engine, gt, id) {
>  		struct intel_context *ce;
>  
> @@ -304,7 +305,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>  	list_del(&ctx->link);
>  	spin_unlock(&ctx->i915->gem.contexts.lock);
>  
> -	free_engines(rcu_access_pointer(ctx->engines));
>  	mutex_destroy(&ctx->engines_mutex);
>  
>  	if (ctx->timeline)
> @@ -491,30 +491,104 @@ static void kill_engines(struct i915_gem_engines *engines)
>  static void kill_stale_engines(struct i915_gem_context *ctx)
>  {
>  	struct i915_gem_engines *pos, *next;
> -	unsigned long flags;
>  
> -	spin_lock_irqsave(&ctx->stale.lock, flags);
> +	spin_lock_irq(&ctx->stale.lock);

Reader ponders why it was with flags on the first place.

> +	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>  	list_for_each_entry_safe(pos, next, &ctx->stale.engines, link) {
> -		if (!i915_sw_fence_await(&pos->fence))
> +		if (!i915_sw_fence_await(&pos->fence)) {
> +			list_del_init(&pos->link);

Is this for making the bed for kill path list_empty?

>  			continue;
> +		}
>  
> -		spin_unlock_irqrestore(&ctx->stale.lock, flags);
> +		spin_unlock_irq(&ctx->stale.lock);
>  
>  		kill_engines(pos);
>  
> -		spin_lock_irqsave(&ctx->stale.lock, flags);
> +		spin_lock_irq(&ctx->stale.lock);
> +		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
>  		list_safe_reset_next(pos, next, link);
>  		list_del_init(&pos->link); /* decouple from FENCE_COMPLETE */
>  
>  		i915_sw_fence_complete(&pos->fence);
>  	}
> -	spin_unlock_irqrestore(&ctx->stale.lock, flags);
> +	spin_unlock_irq(&ctx->stale.lock);
>  }
>  
>  static void kill_context(struct i915_gem_context *ctx)
>  {
>  	kill_stale_engines(ctx);
> -	kill_engines(__context_engines_static(ctx));
> +}
> +
> +static int engines_notify(struct i915_sw_fence *fence,
> +			  enum i915_sw_fence_notify state)
> +{
> +	struct i915_gem_engines *engines =
> +		container_of(fence, typeof(*engines), fence);
> +
> +	switch (state) {
> +	case FENCE_COMPLETE:
> +		if (!list_empty(&engines->link)) {
> +			struct i915_gem_context *ctx = engines->ctx;
> +			unsigned long flags;
> +
> +			spin_lock_irqsave(&ctx->stale.lock, flags);
> +			list_del(&engines->link);
> +			spin_unlock_irqrestore(&ctx->stale.lock, flags);
> +		}
> +		i915_gem_context_put(engines->ctx);
> +		break;
> +
> +	case FENCE_FREE:
> +		init_rcu_head(&engines->rcu);
> +		call_rcu(&engines->rcu, free_engines_rcu);
> +		break;
> +	}
> +
> +	return NOTIFY_DONE;
> +}
> +
> +static void engines_idle_release(struct i915_gem_context *ctx,
> +				 struct i915_gem_engines *engines)
> +{
> +	struct i915_gem_engines_iter it;
> +	struct intel_context *ce;
> +
> +	i915_sw_fence_init(&engines->fence, engines_notify);
> +	INIT_LIST_HEAD(&engines->link);
> +
> +	engines->ctx = i915_gem_context_get(ctx);
> +
> +	for_each_gem_engine(ce, engines, it) {
> +		struct dma_fence *fence;
> +		int err = 0;
> +
> +		/* serialises with execbuf */
> +		RCU_INIT_POINTER(ce->gem_context, NULL);
> +		if (!intel_context_pin_if_active(ce))
> +			continue;

Ok so this should ensure the lifetime past the execbufs.

> +
> +		fence = i915_active_fence_get(&ce->timeline->last_request);
> +		if (fence) {
> +			err = i915_sw_fence_await_dma_fence(&engines->fence,
> +							    fence, 0,
> +							    GFP_KERNEL);
> +			dma_fence_put(fence);
> +		}
> +		intel_context_unpin(ce);
> +		if (err < 0)
> +			goto kill;
> +	}
> +
> +	spin_lock_irq(&ctx->stale.lock);
> +	if (!i915_gem_context_is_closed(ctx))
> +		list_add_tail(&engines->link, &ctx->stale.engines);
> +	spin_unlock_irq(&ctx->stale.lock);
> +
> +kill:
> +	if (list_empty(&engines->link)) /* raced, already closed */
> +		kill_engines(engines);
> +
> +	i915_sw_fence_commit(&engines->fence);
>  }
>  
>  static void set_closed_name(struct i915_gem_context *ctx)
> @@ -538,11 +612,16 @@ static void context_close(struct i915_gem_context *ctx)
>  {
>  	struct i915_address_space *vm;
>  
> +	/* Flush any concurrent set_engines() */
> +	mutex_lock(&ctx->engines_mutex);
> +	engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1));
>  	i915_gem_context_set_closed(ctx);
> -	set_closed_name(ctx);
> +	mutex_unlock(&ctx->engines_mutex);
>  
>  	mutex_lock(&ctx->mutex);
>  
> +	set_closed_name(ctx);

Looks more natural spot for this.
-Mika

> +
>  	vm = i915_gem_context_vm(ctx);
>  	if (vm)
>  		i915_vm_close(vm);
> @@ -1626,77 +1705,6 @@ static const i915_user_extension_fn set_engines__extensions[] = {
>  	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
>  };
>  
> -static int engines_notify(struct i915_sw_fence *fence,
> -			  enum i915_sw_fence_notify state)
> -{
> -	struct i915_gem_engines *engines =
> -		container_of(fence, typeof(*engines), fence);
> -
> -	switch (state) {
> -	case FENCE_COMPLETE:
> -		if (!list_empty(&engines->link)) {
> -			struct i915_gem_context *ctx = engines->ctx;
> -			unsigned long flags;
> -
> -			spin_lock_irqsave(&ctx->stale.lock, flags);
> -			list_del(&engines->link);
> -			spin_unlock_irqrestore(&ctx->stale.lock, flags);
> -		}
> -		break;
> -
> -	case FENCE_FREE:
> -		init_rcu_head(&engines->rcu);
> -		call_rcu(&engines->rcu, free_engines_rcu);
> -		break;
> -	}
> -
> -	return NOTIFY_DONE;
> -}
> -
> -static void engines_idle_release(struct i915_gem_engines *engines)
> -{
> -	struct i915_gem_engines_iter it;
> -	struct intel_context *ce;
> -	unsigned long flags;
> -
> -	GEM_BUG_ON(!engines);
> -	i915_sw_fence_init(&engines->fence, engines_notify);
> -
> -	INIT_LIST_HEAD(&engines->link);
> -	spin_lock_irqsave(&engines->ctx->stale.lock, flags);
> -	if (!i915_gem_context_is_closed(engines->ctx))
> -		list_add(&engines->link, &engines->ctx->stale.engines);
> -	spin_unlock_irqrestore(&engines->ctx->stale.lock, flags);
> -	if (list_empty(&engines->link)) /* raced, already closed */
> -		goto kill;
> -
> -	for_each_gem_engine(ce, engines, it) {
> -		struct dma_fence *fence;
> -		int err;
> -
> -		if (!ce->timeline)
> -			continue;
> -
> -		fence = i915_active_fence_get(&ce->timeline->last_request);
> -		if (!fence)
> -			continue;
> -
> -		err = i915_sw_fence_await_dma_fence(&engines->fence,
> -						    fence, 0,
> -						    GFP_KERNEL);
> -
> -		dma_fence_put(fence);
> -		if (err < 0)
> -			goto kill;
> -	}
> -	goto out;
> -
> -kill:
> -	kill_engines(engines);
> -out:
> -	i915_sw_fence_commit(&engines->fence);
> -}
> -
>  static int
>  set_engines(struct i915_gem_context *ctx,
>  	    const struct drm_i915_gem_context_param *args)
> @@ -1739,8 +1747,6 @@ set_engines(struct i915_gem_context *ctx,
>  	if (!set.engines)
>  		return -ENOMEM;
>  
> -	set.engines->ctx = ctx;
> -
>  	for (n = 0; n < num_engines; n++) {
>  		struct i915_engine_class_instance ci;
>  		struct intel_engine_cs *engine;
> @@ -1793,6 +1799,11 @@ set_engines(struct i915_gem_context *ctx,
>  
>  replace:
>  	mutex_lock(&ctx->engines_mutex);
> +	if (i915_gem_context_is_closed(ctx)) {
> +		mutex_unlock(&ctx->engines_mutex);
> +		free_engines(set.engines);
> +		return -ENOENT;
> +	}
>  	if (args->size)
>  		i915_gem_context_set_user_engines(ctx);
>  	else
> @@ -1801,7 +1812,7 @@ set_engines(struct i915_gem_context *ctx,
>  	mutex_unlock(&ctx->engines_mutex);
>  
>  	/* Keep track of old engine sets for kill_context() */
> -	engines_idle_release(set.engines);
> +	engines_idle_release(ctx, set.engines);
>  
>  	return 0;
>  }
> @@ -2077,8 +2088,6 @@ static int clone_engines(struct i915_gem_context *dst,
>  	if (!clone)
>  		goto err_unlock;
>  
> -	clone->ctx = dst;
> -
>  	for (n = 0; n < e->num_engines; n++) {
>  		struct intel_engine_cs *engine;
>  
> @@ -2121,8 +2130,7 @@ static int clone_engines(struct i915_gem_context *dst,
>  	i915_gem_context_unlock_engines(src);
>  
>  	/* Serialised by constructor */
> -	free_engines(__context_engines_static(dst));
> -	RCU_INIT_POINTER(dst->engines, clone);
> +	engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1));
>  	if (user_engines)
>  		i915_gem_context_set_user_engines(dst);
>  	else
> @@ -2553,6 +2561,9 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
>  	const struct i915_gem_engines *e = it->engines;
>  	struct intel_context *ctx;
>  
> +	if (unlikely(!e))
> +		return NULL;
> +
>  	do {
>  		if (it->idx >= e->num_engines)
>  			return NULL;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> index 3ae61a355d87..57b7ae2893e1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
> @@ -207,7 +207,6 @@ static inline void
>  i915_gem_engines_iter_init(struct i915_gem_engines_iter *it,
>  			   struct i915_gem_engines *engines)
>  {
> -	GEM_BUG_ON(!engines);
>  	it->engines = engines;
>  	it->idx = 0;
>  }
> diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> index b12ea1daa29d..e7e3c620f542 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> @@ -23,6 +23,9 @@ mock_context(struct drm_i915_private *i915,
>  	INIT_LIST_HEAD(&ctx->link);
>  	ctx->i915 = i915;
>  
> +	spin_lock_init(&ctx->stale.lock);
> +	INIT_LIST_HEAD(&ctx->stale.engines);
> +
>  	i915_gem_context_set_persistence(ctx);
>  
>  	mutex_init(&ctx->engines_mutex);
> -- 
> 2.25.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release
  2020-03-02 18:29 ` [Intel-gfx] [PATCH 01/22] " Mika Kuoppala
@ 2020-03-02 21:43   ` Chris Wilson
  0 siblings, 0 replies; 37+ messages in thread
From: Chris Wilson @ 2020-03-02 21:43 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2020-03-02 18:29:09)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > @@ -491,30 +491,104 @@ static void kill_engines(struct i915_gem_engines *engines)
> >  static void kill_stale_engines(struct i915_gem_context *ctx)
> >  {
> >       struct i915_gem_engines *pos, *next;
> > -     unsigned long flags;
> >  
> > -     spin_lock_irqsave(&ctx->stale.lock, flags);
> > +     spin_lock_irq(&ctx->stale.lock);
> 
> Reader ponders why it was with flags on the first place.

Hindsight is 20/20.

> > +     GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
> >       list_for_each_entry_safe(pos, next, &ctx->stale.engines, link) {
> > -             if (!i915_sw_fence_await(&pos->fence))
> > +             if (!i915_sw_fence_await(&pos->fence)) {
> > +                     list_del_init(&pos->link);
> 
> Is this for making the bed for kill path list_empty?

It's acknowledging that if we are racing in FENCE_COMPLETE, we don't
need to kill it.

> > +static void engines_idle_release(struct i915_gem_context *ctx,
> > +                              struct i915_gem_engines *engines)
> > +{
> > +     struct i915_gem_engines_iter it;
> > +     struct intel_context *ce;
> > +
> > +     i915_sw_fence_init(&engines->fence, engines_notify);
> > +     INIT_LIST_HEAD(&engines->link);
> > +
> > +     engines->ctx = i915_gem_context_get(ctx);
> > +
> > +     for_each_gem_engine(ce, engines, it) {
> > +             struct dma_fence *fence;
> > +             int err = 0;
> > +
> > +             /* serialises with execbuf */
> > +             RCU_INIT_POINTER(ce->gem_context, NULL);
> > +             if (!intel_context_pin_if_active(ce))
> > +                     continue;
> 
> Ok so this should ensure the lifetime past the execbufs.
> 
> > +
> > +             fence = i915_active_fence_get(&ce->timeline->last_request);

Note it's this fence_get + ce->gem_context that combine to give us the
execbuf closure guard.

> > +             if (fence) {
> > +                     err = i915_sw_fence_await_dma_fence(&engines->fence,
> > +                                                         fence, 0,
> > +                                                         GFP_KERNEL);
> > +                     dma_fence_put(fence);
> > +             }
> > +             intel_context_unpin(ce);
> > +             if (err < 0)
> > +                     goto kill;
> > +     }
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 17/22] dma-buf: Proxy fence, an unsignaled fence placeholder
  2020-03-02  8:58 ` [Intel-gfx] [PATCH 17/22] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
@ 2020-03-03 16:24     ` kbuild test robot
  0 siblings, 0 replies; 37+ messages in thread
From: kbuild test robot @ 2020-03-03 16:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: clang-built-linux, intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1768 bytes --]

Hi Chris,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[cannot apply to drm-tip/drm-tip linus/master v5.6-rc4 next-20200303]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-gem-Consolidate-ctx-engines-release/20200302-174042
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-allyesconfig (attached as .config)
compiler: clang version 11.0.0 (git://gitmirror/llvm_project 211fb91f1067ecdf7c0b8a019bcf76554d813129)
reproduce:
        # FIXME the reproduce steps for clang is not ready yet

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from drivers/dma-buf/dma-fence.c:22:
>> drivers/dma-buf/dma-fence-private.h:13:9: warning: 'DMA_FENCE_PRIVATE_H' is used as a header guard here, followed by #define of a different macro [-Wheader-guard]
   #ifndef DMA_FENCE_PRIVATE_H
           ^~~~~~~~~~~~~~~~~~~
   drivers/dma-buf/dma-fence-private.h:14:9: note: 'DMA_FENCE_PRIAVTE_H' is defined here; did you mean 'DMA_FENCE_PRIVATE_H'?
   #define DMA_FENCE_PRIAVTE_H
           ^~~~~~~~~~~~~~~~~~~
           DMA_FENCE_PRIVATE_H
   1 warning generated.

vim +/DMA_FENCE_PRIVATE_H +13 drivers/dma-buf/dma-fence-private.h

  > 13	#ifndef DMA_FENCE_PRIVATE_H
    14	#define DMA_FENCE_PRIAVTE_H
    15	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 72283 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 17/22] dma-buf: Proxy fence, an unsignaled fence placeholder
@ 2020-03-03 16:24     ` kbuild test robot
  0 siblings, 0 replies; 37+ messages in thread
From: kbuild test robot @ 2020-03-03 16:24 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1811 bytes --]

Hi Chris,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[cannot apply to drm-tip/drm-tip linus/master v5.6-rc4 next-20200303]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-gem-Consolidate-ctx-engines-release/20200302-174042
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-allyesconfig (attached as .config)
compiler: clang version 11.0.0 (git://gitmirror/llvm_project 211fb91f1067ecdf7c0b8a019bcf76554d813129)
reproduce:
        # FIXME the reproduce steps for clang is not ready yet

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from drivers/dma-buf/dma-fence.c:22:
>> drivers/dma-buf/dma-fence-private.h:13:9: warning: 'DMA_FENCE_PRIVATE_H' is used as a header guard here, followed by #define of a different macro [-Wheader-guard]
   #ifndef DMA_FENCE_PRIVATE_H
           ^~~~~~~~~~~~~~~~~~~
   drivers/dma-buf/dma-fence-private.h:14:9: note: 'DMA_FENCE_PRIAVTE_H' is defined here; did you mean 'DMA_FENCE_PRIVATE_H'?
   #define DMA_FENCE_PRIAVTE_H
           ^~~~~~~~~~~~~~~~~~~
           DMA_FENCE_PRIVATE_H
   1 warning generated.

vim +/DMA_FENCE_PRIVATE_H +13 drivers/dma-buf/dma-fence-private.h

  > 13	#ifndef DMA_FENCE_PRIVATE_H
    14	#define DMA_FENCE_PRIAVTE_H
    15	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 72283 bytes --]

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2020-03-03 16:25 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-02  8:57 [Intel-gfx] [PATCH 01/22] drm/i915/gem: Consolidate ctx->engines[] release Chris Wilson
2020-03-02  8:57 ` [Intel-gfx] [PATCH 02/22] drm/i915/gt: Prevent allocation on a banned context Chris Wilson
2020-03-02  8:57 ` [Intel-gfx] [PATCH 03/22] drm/i915/gem: Check that the context wasn't closed during setup Chris Wilson
2020-03-02  8:57 ` [Intel-gfx] [PATCH 04/22] drm/i915: Drop inspection of execbuf flags during evict Chris Wilson
2020-03-02  8:57 ` [Intel-gfx] [PATCH 05/22] drm/i915/gem: Extract transient execbuf flags from i915_vma Chris Wilson
2020-03-02  8:57 ` [Intel-gfx] [PATCH 06/22] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl Chris Wilson
2020-03-02  9:39   ` Maarten Lankhorst
2020-03-02  8:57 ` [Intel-gfx] [PATCH 07/22] drm/i915/perf: Reintroduce wait on OA configuration completion Chris Wilson
2020-03-02 10:29   ` Lionel Landwerlin
2020-03-02  8:57 ` [Intel-gfx] [PATCH 08/22] drm/i915: Wrap i915_active in a simple kreffed struct Chris Wilson
2020-03-02 16:18   ` Mika Kuoppala
2020-03-02 16:23     ` Chris Wilson
2020-03-02 16:34       ` Mika Kuoppala
2020-03-02 16:42         ` Chris Wilson
2020-03-02  8:57 ` [Intel-gfx] [PATCH 09/22] drm/i915: Extend i915_request_await_active to use all timelines Chris Wilson
2020-03-02  8:58 ` [Intel-gfx] [PATCH 10/22] drm/i915/perf: Schedule oa_config after modifying the contexts Chris Wilson
2020-03-02  8:58 ` [Intel-gfx] [PATCH 11/22] drm/i915/selftests: Add request throughput measurement to perf Chris Wilson
2020-03-02  8:58 ` [Intel-gfx] [PATCH 12/22] drm/i915/execlists: Check the sentinel is alone in the ELSP Chris Wilson
2020-03-02 14:04   ` Mika Kuoppala
2020-03-02  8:58 ` [Intel-gfx] [PATCH 13/22] drm/i915/execlists: Reduce preempt-to-busy roundtrip delay Chris Wilson
2020-03-02  8:58 ` [Intel-gfx] [PATCH 14/22] dma-buf: Prettify typecasts for dma-fence-chain Chris Wilson
2020-03-02 16:11   ` Mika Kuoppala
2020-03-02  8:58 ` [Intel-gfx] [PATCH 15/22] dma-buf: Report signaled links inside dma-fence-chain Chris Wilson
2020-03-02  8:58 ` [Intel-gfx] [PATCH 16/22] dma-buf: Exercise dma-fence-chain under selftests Chris Wilson
2020-03-02  8:58 ` [Intel-gfx] [PATCH 17/22] dma-buf: Proxy fence, an unsignaled fence placeholder Chris Wilson
2020-03-03 16:24   ` kbuild test robot
2020-03-03 16:24     ` kbuild test robot
2020-03-02  8:58 ` [Intel-gfx] [PATCH 18/22] drm/syncobj: Allow use of dma-fence-proxy Chris Wilson
2020-03-02  8:58 ` [Intel-gfx] [PATCH 19/22] drm/i915/gem: Teach execbuf how to wait on future syncobj Chris Wilson
2020-03-02  8:58 ` [Intel-gfx] [PATCH 20/22] drm/i915/gem: Allow combining submit-fences with syncobj Chris Wilson
2020-03-02  8:58 ` [Intel-gfx] [PATCH 21/22] drm/i915/gt: Declare when we enabled timeslicing Chris Wilson
2020-03-02  8:58 ` [Intel-gfx] [PATCH 22/22] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
2020-03-02 13:58 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/22] drm/i915/gem: Consolidate ctx->engines[] release Patchwork
2020-03-02 14:18 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2020-03-02 14:24 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2020-03-02 18:29 ` [Intel-gfx] [PATCH 01/22] " Mika Kuoppala
2020-03-02 21:43   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.