All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
@ 2018-06-29 22:54 Chris Wilson
  2018-06-29 22:54 ` [PATCH 2/6] drm/i915: Export i915_request_skip() Chris Wilson
                   ` (15 more replies)
  0 siblings, 16 replies; 40+ messages in thread
From: Chris Wilson @ 2018-06-29 22:54 UTC (permalink / raw)
  To: intel-gfx

Currently all callers are responsible for adding the vma to the active
timeline and then exporting its fence. Combine the two operations into
i915_vma_move_to_active() to move all the extra handling from the
callers to the single site.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 47 +++++++++----------
 drivers/gpu/drm/i915/selftests/huge_pages.c   |  4 --
 .../drm/i915/selftests/i915_gem_coherency.c   |  4 --
 .../gpu/drm/i915/selftests/i915_gem_context.c |  4 --
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  4 --
 5 files changed, 21 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c2dd9b4cdace..91f20445147f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1166,15 +1166,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
 	i915_vma_move_to_active(batch, rq, 0);
-	reservation_object_lock(batch->resv, NULL);
-	reservation_object_add_excl_fence(batch->resv, &rq->fence);
-	reservation_object_unlock(batch->resv);
 	i915_vma_unpin(batch);
 
 	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-	reservation_object_lock(vma->resv, NULL);
-	reservation_object_add_excl_fence(vma->resv, &rq->fence);
-	reservation_object_unlock(vma->resv);
 
 	rq->batch = batch;
 
@@ -1771,25 +1765,6 @@ static int eb_relocate(struct i915_execbuffer *eb)
 	return eb_relocate_slow(eb);
 }
 
-static void eb_export_fence(struct i915_vma *vma,
-			    struct i915_request *rq,
-			    unsigned int flags)
-{
-	struct reservation_object *resv = vma->resv;
-
-	/*
-	 * Ignore errors from failing to allocate the new fence, we can't
-	 * handle an error right now. Worst case should be missed
-	 * synchronisation leading to rendering corruption.
-	 */
-	reservation_object_lock(resv, NULL);
-	if (flags & EXEC_OBJECT_WRITE)
-		reservation_object_add_excl_fence(resv, &rq->fence);
-	else if (reservation_object_reserve_shared(resv) == 0)
-		reservation_object_add_shared_fence(resv, &rq->fence);
-	reservation_object_unlock(resv);
-}
-
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
@@ -1844,7 +1819,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		struct i915_vma *vma = eb->vma[i];
 
 		i915_vma_move_to_active(vma, eb->request, flags);
-		eb_export_fence(vma, eb->request, flags);
 
 		__eb_unreserve_vma(vma, flags);
 		vma->exec_flags = NULL;
@@ -1884,6 +1858,25 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 	return true;
 }
 
+static void export_fence(struct i915_vma *vma,
+			 struct i915_request *rq,
+			 unsigned int flags)
+{
+	struct reservation_object *resv = vma->resv;
+
+	/*
+	 * Ignore errors from failing to allocate the new fence, we can't
+	 * handle an error right now. Worst case should be missed
+	 * synchronisation leading to rendering corruption.
+	 */
+	reservation_object_lock(resv, NULL);
+	if (flags & EXEC_OBJECT_WRITE)
+		reservation_object_add_excl_fence(resv, &rq->fence);
+	else if (reservation_object_reserve_shared(resv) == 0)
+		reservation_object_add_shared_fence(resv, &rq->fence);
+	reservation_object_unlock(resv);
+}
+
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct i915_request *rq,
 			     unsigned int flags)
@@ -1921,6 +1914,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 
 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
 		i915_gem_active_set(&vma->last_fence, rq);
+
+	export_fence(vma, rq, flags);
 }
 
 static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index b5e87fcdcdae..358fc81f6c99 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -998,10 +998,6 @@ static int gpu_write(struct i915_vma *vma,
 
 	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 
-	reservation_object_lock(vma->resv, NULL);
-	reservation_object_add_excl_fence(vma->resv, &rq->fence);
-	reservation_object_unlock(vma->resv);
-
 err_request:
 	i915_request_add(rq);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index a4900091ae3d..11427aae0853 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -225,10 +225,6 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unpin(vma);
 
-	reservation_object_lock(obj->resv, NULL);
-	reservation_object_add_excl_fence(obj->resv, &rq->fence);
-	reservation_object_unlock(obj->resv);
-
 	i915_request_add(rq);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index cc848ceeb3c3..81ed87aa0a4d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -178,10 +178,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unpin(vma);
 
-	reservation_object_lock(obj->resv, NULL);
-	reservation_object_add_excl_fence(obj->resv, &rq->fence);
-	reservation_object_unlock(obj->resv);
-
 	i915_request_add(rq);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 77dd7a510ea6..fa5a0654314a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -456,10 +456,6 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 
 	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 
-	reservation_object_lock(vma->resv, NULL);
-	reservation_object_add_excl_fence(vma->resv, &rq->fence);
-	reservation_object_unlock(vma->resv);
-
 	i915_request_add(rq);
 
 	i915_gem_object_set_active_reference(obj);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/6] drm/i915: Export i915_request_skip()
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
@ 2018-06-29 22:54 ` Chris Wilson
  2018-07-02 11:37   ` Tvrtko Ursulin
  2018-06-29 22:54 ` [PATCH 3/6] drm/i915: Start returning an error from i915_vma_move_to_active() Chris Wilson
                   ` (14 subsequent siblings)
  15 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2018-06-29 22:54 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will want to start skipping requests on failing to
complete their payloads. So export the utility function current used to
make requests inoperable following a failed gpu reset.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c     | 25 +++----------------------
 drivers/gpu/drm/i915/i915_request.c | 21 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_request.h |  2 ++
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8954db6ab083..2ff6251483ce 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3074,25 +3074,6 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 	return err;
 }
 
-static void skip_request(struct i915_request *request)
-{
-	void *vaddr = request->ring->vaddr;
-	u32 head;
-
-	/* As this request likely depends on state from the lost
-	 * context, clear out all the user operations leaving the
-	 * breadcrumb at the end (so we get the fence notifications).
-	 */
-	head = request->head;
-	if (request->postfix < head) {
-		memset(vaddr + head, 0, request->ring->size - head);
-		head = 0;
-	}
-	memset(vaddr + head, 0, request->postfix - head);
-
-	dma_fence_set_error(&request->fence, -EIO);
-}
-
 static void engine_skip_context(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -3107,10 +3088,10 @@ static void engine_skip_context(struct i915_request *request)
 
 	list_for_each_entry_continue(request, &engine->timeline.requests, link)
 		if (request->gem_context == hung_ctx)
-			skip_request(request);
+			i915_request_skip(request, -EIO);
 
 	list_for_each_entry(request, &timeline->requests, link)
-		skip_request(request);
+		i915_request_skip(request, -EIO);
 
 	spin_unlock(&timeline->lock);
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
@@ -3153,7 +3134,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
 
 	if (stalled) {
 		i915_gem_context_mark_guilty(request->gem_context);
-		skip_request(request);
+		i915_request_skip(request, -EIO);
 
 		/* If this context is now banned, skip all pending requests. */
 		if (i915_gem_context_is_banned(request->gem_context))
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a2f7e9358450..7ae08b68121e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1013,6 +1013,27 @@ i915_request_await_object(struct i915_request *to,
 	return ret;
 }
 
+void i915_request_skip(struct i915_request *rq, int error)
+{
+	void *vaddr = rq->ring->vaddr;
+	u32 head;
+
+	GEM_BUG_ON(!IS_ERR_VALUE((long)error));
+	dma_fence_set_error(&rq->fence, error);
+
+	/*
+	 * As this request likely depends on state from the lost
+	 * context, clear out all the user operations leaving the
+	 * breadcrumb at the end (so we get the fence notifications).
+	 */
+	head = rq->infix;
+	if (rq->postfix < head) {
+		memset(vaddr + head, 0, rq->ring->size - head);
+		head = 0;
+	}
+	memset(vaddr + head, 0, rq->postfix - head);
+}
+
 /*
  * NB: This function is not allowed to fail. Doing so would mean the the
  * request is not being tracked for completion but the work itself is
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 7ee220ded9c9..a355a081485f 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -258,6 +258,8 @@ void i915_request_add(struct i915_request *rq);
 void __i915_request_submit(struct i915_request *request);
 void i915_request_submit(struct i915_request *request);
 
+void i915_request_skip(struct i915_request *request, int error);
+
 void __i915_request_unsubmit(struct i915_request *request);
 void i915_request_unsubmit(struct i915_request *request);
 
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 3/6] drm/i915: Start returning an error from i915_vma_move_to_active()
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
  2018-06-29 22:54 ` [PATCH 2/6] drm/i915: Export i915_request_skip() Chris Wilson
@ 2018-06-29 22:54 ` Chris Wilson
  2018-07-02 11:41   ` Tvrtko Ursulin
  2018-06-29 22:54 ` [PATCH 4/6] drm/i915: Move i915_vma_move_to_active() to i915_vma.c Chris Wilson
                   ` (13 subsequent siblings)
  15 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2018-06-29 22:54 UTC (permalink / raw)
  To: intel-gfx

Handling such a late error in request construction is tricky, but to
accommodate future patches which may allocate here, we potentially could
err. To handle the error after already adjusting global state to track
the new request, we must finish and submit the request. But we don't
want to use the request as not everything is being tracked by it, so we
opt to cancel the commands inside the request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gvt/scheduler.c          |  6 ++++-
 drivers/gpu/drm/i915/i915_drv.h               |  6 ++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 25 +++++++++++++------
 drivers/gpu/drm/i915/i915_gem_render_state.c  |  2 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |  9 +++++--
 .../drm/i915/selftests/i915_gem_coherency.c   |  4 +--
 .../gpu/drm/i915/selftests/i915_gem_context.c | 12 +++++++--
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  7 +++---
 drivers/gpu/drm/i915/selftests/i915_request.c |  8 ++++--
 .../gpu/drm/i915/selftests/intel_hangcheck.c  | 11 ++++++--
 drivers/gpu/drm/i915/selftests/intel_lrc.c    | 11 ++++++--
 .../drm/i915/selftests/intel_workarounds.c    |  5 +++-
 12 files changed, 78 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 928818f218f7..b0e566956b8d 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -476,7 +476,11 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			i915_gem_obj_finish_shmem_access(bb->obj);
 			bb->accessing = false;
 
-			i915_vma_move_to_active(bb->vma, workload->req, 0);
+			ret = i915_vma_move_to_active(bb->vma,
+						      workload->req,
+						      0);
+			if (ret)
+				goto err;
 		}
 	}
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ce7d06332884..cd8f69a00e86 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3099,9 +3099,9 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 }
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct i915_request *rq,
-			     unsigned int flags);
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+					 struct i915_request *rq,
+					 unsigned int flags);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 91f20445147f..97136e4ce91d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1165,12 +1165,16 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_request;
 
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
-	i915_vma_move_to_active(batch, rq, 0);
-	i915_vma_unpin(batch);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto skip_request;
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto skip_request;
 
 	rq->batch = batch;
+	i915_vma_unpin(batch);
 
 	cache->rq = rq;
 	cache->rq_cmd = cmd;
@@ -1179,6 +1183,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	/* Return with batch mapping (cmd) still pinned */
 	return 0;
 
+skip_request:
+	i915_request_skip(rq, err);
 err_request:
 	i915_request_add(rq);
 err_unpin:
@@ -1818,7 +1824,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		unsigned int flags = eb->flags[i];
 		struct i915_vma *vma = eb->vma[i];
 
-		i915_vma_move_to_active(vma, eb->request, flags);
+		err = i915_vma_move_to_active(vma, eb->request, flags);
+		if (unlikely(err)) {
+			i915_request_skip(eb->request, err);
+			return err;
+		}
 
 		__eb_unreserve_vma(vma, flags);
 		vma->exec_flags = NULL;
@@ -1877,9 +1887,9 @@ static void export_fence(struct i915_vma *vma,
 	reservation_object_unlock(resv);
 }
 
-void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct i915_request *rq,
-			     unsigned int flags)
+int i915_vma_move_to_active(struct i915_vma *vma,
+			    struct i915_request *rq,
+			    unsigned int flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 	const unsigned int idx = rq->engine->id;
@@ -1916,6 +1926,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 		i915_gem_active_set(&vma->last_fence, rq);
 
 	export_fence(vma, rq, flags);
+	return 0;
 }
 
 static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 3210cedfa46c..90baf9086d0a 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -222,7 +222,7 @@ int i915_gem_render_state_emit(struct i915_request *rq)
 			goto err_unpin;
 	}
 
-	i915_vma_move_to_active(so.vma, rq, 0);
+	err = i915_vma_move_to_active(so.vma, rq, 0);
 err_unpin:
 	i915_vma_unpin(so.vma);
 err_vma:
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 358fc81f6c99..d33e20940e0a 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -985,7 +985,10 @@ static int gpu_write(struct i915_vma *vma,
 		goto err_request;
 	}
 
-	i915_vma_move_to_active(batch, rq, 0);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto err_request;
+
 	i915_gem_object_set_active_reference(batch->obj);
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
@@ -996,7 +999,9 @@ static int gpu_write(struct i915_vma *vma,
 	if (err)
 		goto err_request;
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		i915_request_skip(rq, err);
 
 err_request:
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index 11427aae0853..328585459c67 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -222,12 +222,12 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	}
 	intel_ring_advance(rq, cs);
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unpin(vma);
 
 	i915_request_add(rq);
 
-	return 0;
+	return err;
 }
 
 static bool always_valid(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 81ed87aa0a4d..da43f0a99eb2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -170,18 +170,26 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		goto err_request;
 
-	i915_vma_move_to_active(batch, rq, 0);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto skip_request;
+
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto skip_request;
+
 	i915_gem_object_set_active_reference(batch->obj);
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	i915_vma_unpin(vma);
 
 	i915_request_add(rq);
 
 	return 0;
 
+skip_request:
+	i915_request_skip(rq, err);
 err_request:
 	i915_request_add(rq);
 err_batch:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index fa5a0654314a..7d0ddef1519b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -454,13 +454,14 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 		return PTR_ERR(rq);
 	}
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 
 	i915_request_add(rq);
 
-	i915_gem_object_set_active_reference(obj);
+	__i915_gem_object_release_unless_active(obj);
 	i915_vma_unpin(vma);
-	return 0;
+
+	return err;
 }
 
 static bool assert_mmap_offset(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 521ae4a90ddf..c27f77a24ce0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -678,7 +678,9 @@ static int live_all_engines(void *arg)
 			i915_gem_object_set_active_reference(batch->obj);
 		}
 
-		i915_vma_move_to_active(batch, request[id], 0);
+		err = i915_vma_move_to_active(batch, request[id], 0);
+		GEM_BUG_ON(err);
+
 		i915_request_get(request[id]);
 		i915_request_add(request[id]);
 	}
@@ -788,7 +790,9 @@ static int live_sequential_engines(void *arg)
 		GEM_BUG_ON(err);
 		request[id]->batch = batch;
 
-		i915_vma_move_to_active(batch, request[id], 0);
+		err = i915_vma_move_to_active(batch, request[id], 0);
+		GEM_BUG_ON(err);
+
 		i915_gem_object_set_active_reference(batch->obj);
 		i915_vma_get(batch);
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index fe7d3190ebfe..e11df2743704 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -130,13 +130,19 @@ static int emit_recurse_batch(struct hang *h,
 	if (err)
 		goto unpin_vma;
 
-	i915_vma_move_to_active(vma, rq, 0);
+	err = i915_vma_move_to_active(vma, rq, 0);
+	if (err)
+		goto unpin_hws;
+
 	if (!i915_gem_object_has_active_reference(vma->obj)) {
 		i915_gem_object_get(vma->obj);
 		i915_gem_object_set_active_reference(vma->obj);
 	}
 
-	i915_vma_move_to_active(hws, rq, 0);
+	err = i915_vma_move_to_active(hws, rq, 0);
+	if (err)
+		goto unpin_hws;
+
 	if (!i915_gem_object_has_active_reference(hws->obj)) {
 		i915_gem_object_get(hws->obj);
 		i915_gem_object_set_active_reference(hws->obj);
@@ -205,6 +211,7 @@ static int emit_recurse_batch(struct hang *h,
 
 	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
 
+unpin_hws:
 	i915_vma_unpin(hws);
 unpin_vma:
 	i915_vma_unpin(vma);
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index ea27c7cfbf96..b34f7ac6631e 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -104,13 +104,19 @@ static int emit_recurse_batch(struct spinner *spin,
 	if (err)
 		goto unpin_vma;
 
-	i915_vma_move_to_active(vma, rq, 0);
+	err = i915_vma_move_to_active(vma, rq, 0);
+	if (err)
+		goto unpin_hws;
+
 	if (!i915_gem_object_has_active_reference(vma->obj)) {
 		i915_gem_object_get(vma->obj);
 		i915_gem_object_set_active_reference(vma->obj);
 	}
 
-	i915_vma_move_to_active(hws, rq, 0);
+	err = i915_vma_move_to_active(hws, rq, 0);
+	if (err)
+		goto unpin_hws;
+
 	if (!i915_gem_object_has_active_reference(hws->obj)) {
 		i915_gem_object_get(hws->obj);
 		i915_gem_object_set_active_reference(hws->obj);
@@ -134,6 +140,7 @@ static int emit_recurse_batch(struct spinner *spin,
 
 	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
 
+unpin_hws:
 	i915_vma_unpin(hws);
 unpin_vma:
 	i915_vma_unpin(vma);
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index e1ea2d2bedd2..c100153cb494 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -49,6 +49,10 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 		goto err_pin;
 	}
 
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto err_req;
+
 	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
 	if (INTEL_GEN(ctx->i915) >= 8)
 		srm++;
@@ -67,7 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	}
 	intel_ring_advance(rq, cs);
 
-	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 	reservation_object_lock(vma->resv, NULL);
 	reservation_object_add_excl_fence(vma->resv, &rq->fence);
 	reservation_object_unlock(vma->resv);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 4/6] drm/i915: Move i915_vma_move_to_active() to i915_vma.c
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
  2018-06-29 22:54 ` [PATCH 2/6] drm/i915: Export i915_request_skip() Chris Wilson
  2018-06-29 22:54 ` [PATCH 3/6] drm/i915: Start returning an error from i915_vma_move_to_active() Chris Wilson
@ 2018-06-29 22:54 ` Chris Wilson
  2018-07-02 11:41   ` Tvrtko Ursulin
  2018-06-29 22:54 ` [PATCH 5/6] drm/i915: Track vma activity per fence.context, not per engine Chris Wilson
                   ` (12 subsequent siblings)
  15 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2018-06-29 22:54 UTC (permalink / raw)
  To: intel-gfx

i915_vma_move_to_active() has grown beyond its execbuf origins, and
should take its rightful place in i915_vma.c as a method for i915_vma!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |  3 --
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 61 ----------------------
 drivers/gpu/drm/i915/i915_vma.c            | 61 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_vma.h            |  4 ++
 4 files changed, 65 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cd8f69a00e86..1c04872890d4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3099,9 +3099,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 }
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-int __must_check i915_vma_move_to_active(struct i915_vma *vma,
-					 struct i915_request *rq,
-					 unsigned int flags);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 97136e4ce91d..3f0c612d42e7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1868,67 +1868,6 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 	return true;
 }
 
-static void export_fence(struct i915_vma *vma,
-			 struct i915_request *rq,
-			 unsigned int flags)
-{
-	struct reservation_object *resv = vma->resv;
-
-	/*
-	 * Ignore errors from failing to allocate the new fence, we can't
-	 * handle an error right now. Worst case should be missed
-	 * synchronisation leading to rendering corruption.
-	 */
-	reservation_object_lock(resv, NULL);
-	if (flags & EXEC_OBJECT_WRITE)
-		reservation_object_add_excl_fence(resv, &rq->fence);
-	else if (reservation_object_reserve_shared(resv) == 0)
-		reservation_object_add_shared_fence(resv, &rq->fence);
-	reservation_object_unlock(resv);
-}
-
-int i915_vma_move_to_active(struct i915_vma *vma,
-			    struct i915_request *rq,
-			    unsigned int flags)
-{
-	struct drm_i915_gem_object *obj = vma->obj;
-	const unsigned int idx = rq->engine->id;
-
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
-	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-
-	/*
-	 * Add a reference if we're newly entering the active list.
-	 * The order in which we add operations to the retirement queue is
-	 * vital here: mark_active adds to the start of the callback list,
-	 * such that subsequent callbacks are called first. Therefore we
-	 * add the active reference first and queue for it to be dropped
-	 * *last*.
-	 */
-	if (!i915_vma_is_active(vma))
-		obj->active_count++;
-	i915_vma_set_active(vma, idx);
-	i915_gem_active_set(&vma->last_read[idx], rq);
-	list_move_tail(&vma->vm_link, &vma->vm->active_list);
-
-	obj->write_domain = 0;
-	if (flags & EXEC_OBJECT_WRITE) {
-		obj->write_domain = I915_GEM_DOMAIN_RENDER;
-
-		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
-			i915_gem_active_set(&obj->frontbuffer_write, rq);
-
-		obj->read_domains = 0;
-	}
-	obj->read_domains |= I915_GEM_GPU_DOMAINS;
-
-	if (flags & EXEC_OBJECT_NEEDS_FENCE)
-		i915_gem_active_set(&vma->last_fence, rq);
-
-	export_fence(vma, rq, flags);
-	return 0;
-}
-
 static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
 {
 	u32 *cs;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d0e606e9b27a..7635c27e7e8b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -859,6 +859,67 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
 		list_del(&vma->obj->userfault_link);
 }
 
+static void export_fence(struct i915_vma *vma,
+			 struct i915_request *rq,
+			 unsigned int flags)
+{
+	struct reservation_object *resv = vma->resv;
+
+	/*
+	 * Ignore errors from failing to allocate the new fence, we can't
+	 * handle an error right now. Worst case should be missed
+	 * synchronisation leading to rendering corruption.
+	 */
+	reservation_object_lock(resv, NULL);
+	if (flags & EXEC_OBJECT_WRITE)
+		reservation_object_add_excl_fence(resv, &rq->fence);
+	else if (reservation_object_reserve_shared(resv) == 0)
+		reservation_object_add_shared_fence(resv, &rq->fence);
+	reservation_object_unlock(resv);
+}
+
+int i915_vma_move_to_active(struct i915_vma *vma,
+			    struct i915_request *rq,
+			    unsigned int flags)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	const unsigned int idx = rq->engine->id;
+
+	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+
+	/*
+	 * Add a reference if we're newly entering the active list.
+	 * The order in which we add operations to the retirement queue is
+	 * vital here: mark_active adds to the start of the callback list,
+	 * such that subsequent callbacks are called first. Therefore we
+	 * add the active reference first and queue for it to be dropped
+	 * *last*.
+	 */
+	if (!i915_vma_is_active(vma))
+		obj->active_count++;
+	i915_vma_set_active(vma, idx);
+	i915_gem_active_set(&vma->last_read[idx], rq);
+	list_move_tail(&vma->vm_link, &vma->vm->active_list);
+
+	obj->write_domain = 0;
+	if (flags & EXEC_OBJECT_WRITE) {
+		obj->write_domain = I915_GEM_DOMAIN_RENDER;
+
+		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
+			i915_gem_active_set(&obj->frontbuffer_write, rq);
+
+		obj->read_domains = 0;
+	}
+	obj->read_domains |= I915_GEM_GPU_DOMAINS;
+
+	if (flags & EXEC_OBJECT_NEEDS_FENCE)
+		i915_gem_active_set(&vma->last_fence, rq);
+
+	export_fence(vma, rq, flags);
+	return 0;
+}
+
 int i915_vma_unbind(struct i915_vma *vma)
 {
 	unsigned long active;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 66a228931517..a218b689e418 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -215,6 +215,10 @@ static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
 	return vma->active & BIT(engine);
 }
 
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+					 struct i915_request *rq,
+					 unsigned int flags);
+
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 5/6] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (2 preceding siblings ...)
  2018-06-29 22:54 ` [PATCH 4/6] drm/i915: Move i915_vma_move_to_active() to i915_vma.c Chris Wilson
@ 2018-06-29 22:54 ` Chris Wilson
  2018-07-03 17:28   ` Tvrtko Ursulin
  2018-07-04  9:13   ` [PATCH v3] " Chris Wilson
  2018-06-29 22:54 ` [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma Chris Wilson
                   ` (11 subsequent siblings)
  15 siblings, 2 replies; 40+ messages in thread
From: Chris Wilson @ 2018-06-29 22:54 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will want to be able to use more flexible request
timelines that can hop between engines. From the vma pov, we can then
not rely on the binding of this request to an engine and so can not
ensure that different requests are ordered through a per-engine
timeline, and so we must track activity of all timelines. (We track
activity on the vma itself to prevent unbinding from HW before the HW
has finished accessing it.)

v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
index is fraught with aliasing of unsigned longs).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c   |   3 -
 drivers/gpu/drm/i915/i915_gpu_error.c |  14 +---
 drivers/gpu/drm/i915/i915_gpu_error.h |   2 +-
 drivers/gpu/drm/i915/i915_request.h   |   1 +
 drivers/gpu/drm/i915/i915_vma.c       | 112 +++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_vma.h       |  46 +++--------
 6 files changed, 98 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c6aa761ca085..50fcf74248f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1996,7 +1996,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
-	int i;
 
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(size > ggtt->vm.total);
@@ -2005,8 +2004,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	if (!vma)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], NULL);
 	init_request_active(&vma->last_fence, NULL);
 
 	vma->vm = &ggtt->vm;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index df524c9cad40..8c81cf3aa182 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				struct drm_i915_error_buffer *err,
 				int count)
 {
-	int i;
-
 	err_printf(m, "%s [%d]:\n", name, count);
 
 	while (count--) {
-		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
+		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
 			   upper_32_bits(err->gtt_offset),
 			   lower_32_bits(err->gtt_offset),
 			   err->size,
 			   err->read_domains,
-			   err->write_domain);
-		for (i = 0; i < I915_NUM_ENGINES; i++)
-			err_printf(m, "%02x ", err->rseqno[i]);
-
-		err_printf(m, "] %02x", err->wseqno);
+			   err->write_domain,
+			   err->wseqno);
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
@@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	int i;
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
 
-	for (i = 0; i < I915_NUM_ENGINES; i++)
-		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
 	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
 	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 58910f1dc67c..f893a4e8b783 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -177,7 +177,7 @@ struct i915_gpu_state {
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
-		u32 rseqno[I915_NUM_ENGINES], wseqno;
+		u32 wseqno;
 		u64 gtt_offset;
 		u32 read_domains;
 		u32 write_domain;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index a355a081485f..e1c9365dfefb 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -380,6 +380,7 @@ static inline void
 init_request_active(struct i915_gem_active *active,
 		    i915_gem_retire_fn retire)
 {
+	RCU_INIT_POINTER(active->request, NULL);
 	INIT_LIST_HEAD(&active->link);
 	active->retire = retire ?: i915_gem_retire_noop;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 7635c27e7e8b..2faad2a1d00e 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 
 #endif
 
+struct i915_vma_active {
+	struct i915_gem_active base;
+	struct i915_vma *vma;
+	struct rb_node node;
+	u64 timeline;
+};
+
 static void
-i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
+__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 {
-	const unsigned int idx = rq->engine->id;
-	struct i915_vma *vma =
-		container_of(active, struct i915_vma, last_read[idx]);
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
-
-	i915_vma_clear_active(vma, idx);
-	if (i915_vma_is_active(vma))
+	GEM_BUG_ON(!i915_vma_is_active(vma));
+	if (--vma->active_count)
 		return;
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
 	}
 }
 
+static void
+i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	struct i915_vma_active *active =
+		container_of(base, typeof(*active), base);
+
+	__i915_vma_retire(active->vma, rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 	struct rb_node *rb, **p;
-	int i;
 
 	/* The aliasing_ppgtt should never be used directly! */
 	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
@@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], i915_vma_retire);
+	vma->active = RB_ROOT;
+
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma)
 static void __i915_vma_destroy(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
-	int i;
+	struct i915_vma_active *iter, *n;
 
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(vma->fence);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
 	list_del(&vma->obj_link);
@@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	if (!i915_vma_is_ggtt(vma))
 		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 
+	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
+		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
+		kfree(iter);
+	}
+
 	kmem_cache_free(i915->vmas, vma);
 }
 
@@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma,
 	reservation_object_unlock(resv);
 }
 
+static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
+{
+	struct i915_vma_active *active;
+	struct rb_node **p, *parent;
+
+	parent = NULL;
+	p = &vma->active.rb_node;
+	while (*p) {
+		parent = *p;
+
+		active = rb_entry(parent, struct i915_vma_active, node);
+		if (active->timeline == idx)
+			return &active->base;
+
+		if (active->timeline < idx)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+
+	active = kmalloc(sizeof(*active), GFP_KERNEL);
+	if (unlikely(!active))
+		return ERR_PTR(-ENOMEM);
+
+	init_request_active(&active->base, i915_vma_retire);
+	active->vma = vma;
+	active->timeline = idx;
+
+	rb_link_node(&active->node, parent, p);
+	rb_insert_color(&active->node, &vma->active);
+
+	return &active->base;
+}
+
 int i915_vma_move_to_active(struct i915_vma *vma,
 			    struct i915_request *rq,
 			    unsigned int flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	const unsigned int idx = rq->engine->id;
+	struct i915_gem_active *active;
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
+	active = lookup_active(vma, rq->fence.context);
+	if (IS_ERR(active))
+		return PTR_ERR(active);
+
 	/*
 	 * Add a reference if we're newly entering the active list.
 	 * The order in which we add operations to the retirement queue is
@@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_vma_is_active(vma))
+	if (!i915_gem_active_isset(active) && !vma->active_count++) {
+		list_move_tail(&vma->vm_link, &vma->vm->active_list);
 		obj->active_count++;
-	i915_vma_set_active(vma, idx);
-	i915_gem_active_set(&vma->last_read[idx], rq);
-	list_move_tail(&vma->vm_link, &vma->vm->active_list);
+	}
+	i915_gem_active_set(active, rq);
+	GEM_BUG_ON(!i915_vma_is_active(vma));
+	GEM_BUG_ON(!obj->active_count);
 
 	obj->write_domain = 0;
 	if (flags & EXEC_OBJECT_WRITE) {
@@ -922,7 +975,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 
 int i915_vma_unbind(struct i915_vma *vma)
 {
-	unsigned long active;
 	int ret;
 
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	 * have side-effects such as unpinning or even unbinding this vma.
 	 */
 	might_sleep();
-	active = i915_vma_get_active(vma);
-	if (active) {
-		int idx;
+	if (i915_vma_is_active(vma)) {
+		struct i915_vma_active *active, *n;
 
 		/*
 		 * When a closed VMA is retired, it is unbound - eek.
@@ -951,18 +1002,17 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
-		for_each_active(active, idx) {
-			ret = i915_gem_active_retire(&vma->last_read[idx],
+		rbtree_postorder_for_each_entry_safe(active, n,
+						     &vma->active, node) {
+			ret = i915_gem_active_retire(&active->base,
 						     &vma->vm->i915->drm.struct_mutex);
 			if (ret)
-				break;
-		}
-
-		if (!ret) {
-			ret = i915_gem_active_retire(&vma->last_fence,
-						     &vma->vm->i915->drm.struct_mutex);
+				goto unpin;
 		}
 
+		ret = i915_gem_active_retire(&vma->last_fence,
+					     &vma->vm->i915->drm.struct_mutex);
+unpin:
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index a218b689e418..c297b0a0dc47 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -26,6 +26,7 @@
 #define __I915_VMA_H__
 
 #include <linux/io-mapping.h>
+#include <linux/rbtree.h>
 
 #include <drm/drm_mm.h>
 
@@ -94,8 +95,8 @@ struct i915_vma {
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
 #define I915_VMA_GGTT_WRITE	BIT(12)
 
-	unsigned int active;
-	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	unsigned int active_count;
+	struct rb_root active;
 	struct i915_gem_active last_fence;
 
 	/**
@@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 
 void i915_vma_unpin_and_release(struct i915_vma **p_vma);
 
+static inline bool i915_vma_is_active(struct i915_vma *vma)
+{
+	return vma->active_count;
+}
+
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+					 struct i915_request *rq,
+					 unsigned int flags);
+
 static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_GGTT;
@@ -187,38 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
-static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
-{
-	return vma->active;
-}
-
-static inline bool i915_vma_is_active(const struct i915_vma *vma)
-{
-	return i915_vma_get_active(vma);
-}
-
-static inline void i915_vma_set_active(struct i915_vma *vma,
-				       unsigned int engine)
-{
-	vma->active |= BIT(engine);
-}
-
-static inline void i915_vma_clear_active(struct i915_vma *vma,
-					 unsigned int engine)
-{
-	vma->active &= ~BIT(engine);
-}
-
-static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
-					      unsigned int engine)
-{
-	return vma->active & BIT(engine);
-}
-
-int __must_check i915_vma_move_to_active(struct i915_vma *vma,
-					 struct i915_request *rq,
-					 unsigned int flags);
-
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (3 preceding siblings ...)
  2018-06-29 22:54 ` [PATCH 5/6] drm/i915: Track vma activity per fence.context, not per engine Chris Wilson
@ 2018-06-29 22:54 ` Chris Wilson
  2018-07-03 17:40   ` Tvrtko Ursulin
  2018-07-04  8:34   ` [PATCH v2] " Chris Wilson
  2018-06-29 23:04 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Patchwork
                   ` (10 subsequent siblings)
  15 siblings, 2 replies; 40+ messages in thread
From: Chris Wilson @ 2018-06-29 22:54 UTC (permalink / raw)
  To: intel-gfx

Using a VMA on more than one timeline concurrently is the exception
rather than the rule (using it concurrently on multiple engines). As we
expect to only use one active tracker, store the most recently used
tracker inside the i915_vma itself and only fallback to the radixtree if
we need a second or more concurrent active trackers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_vma.c | 36 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_vma.h |  1 +
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 2faad2a1d00e..9b69d24c5cf1 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
 	__i915_vma_retire(active->vma, rq);
 }
 
+static void
+i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
 
 	vma->active = RB_ROOT;
 
+	init_request_active(&vma->last_active, i915_vma_last_retire);
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -895,6 +902,15 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 {
 	struct i915_vma_active *active;
 	struct rb_node **p, *parent;
+	struct i915_request *old;
+
+	old = i915_gem_active_raw(&vma->last_active,
+				  &vma->vm->i915->drm.struct_mutex);
+	if (!old || old->fence.context == idx)
+		goto out;
+
+	/* Move the currently active fence into the rbtree */
+	idx = old->fence.context;
 
 	parent = NULL;
 	p = &vma->active.rb_node;
@@ -903,7 +919,7 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 
 		active = rb_entry(parent, struct i915_vma_active, node);
 		if (active->timeline == idx)
-			return &active->base;
+			goto replace;
 
 		if (active->timeline < idx)
 			p = &parent->rb_right;
@@ -922,7 +938,18 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 	rb_link_node(&active->node, parent, p);
 	rb_insert_color(&active->node, &vma->active);
 
-	return &active->base;
+replace:
+	if (i915_gem_active_isset(&active->base)) {
+		__list_del_entry(&active->base.link);
+		vma->active_count--;
+		GEM_BUG_ON(!vma->active_count);
+	}
+	GEM_BUG_ON(list_empty(&vma->last_active.link));
+	list_replace_init(&vma->last_active.link, &active->base.link);
+	active->base.request = fetch_and_zero(&vma->last_active.request);
+
+out:
+	return &vma->last_active;
 }
 
 int i915_vma_move_to_active(struct i915_vma *vma,
@@ -1002,6 +1029,11 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
+		ret = i915_gem_active_retire(&vma->last_active,
+					     &vma->vm->i915->drm.struct_mutex);
+		if (ret)
+			goto unpin;
+
 		rbtree_postorder_for_each_entry_safe(active, n,
 						     &vma->active, node) {
 			ret = i915_gem_active_retire(&active->base,
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index c297b0a0dc47..f06d66377107 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -97,6 +97,7 @@ struct i915_vma {
 
 	unsigned int active_count;
 	struct rb_root active;
+	struct i915_gem_active last_active;
 	struct i915_gem_active last_fence;
 
 	/**
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (4 preceding siblings ...)
  2018-06-29 22:54 ` [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma Chris Wilson
@ 2018-06-29 23:04 ` Patchwork
  2018-06-29 23:19 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2018-06-29 23:04 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
URL   : https://patchwork.freedesktop.org/series/45689/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Refactor export_fence() after i915_vma_move_to_active()
Okay!

Commit: drm/i915: Export i915_request_skip()
Okay!

Commit: drm/i915: Start returning an error from i915_vma_move_to_active()
Okay!

Commit: drm/i915: Move i915_vma_move_to_active() to i915_vma.c
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3667:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3664:16: warning: expression using sizeof(void)

Commit: drm/i915: Track vma activity per fence.context, not per engine
Okay!

Commit: drm/i915: Track the last-active inside the i915_vma
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (5 preceding siblings ...)
  2018-06-29 23:04 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Patchwork
@ 2018-06-29 23:19 ` Patchwork
  2018-06-30  3:03 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2018-06-29 23:19 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
URL   : https://patchwork.freedesktop.org/series/45689/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4409 -> Patchwork_9489 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/45689/revisions/1/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9489 that come from known issues:

  === IGT changes ===

    ==== Possible fixes ====

    igt@kms_frontbuffer_tracking@basic:
      fi-hsw-peppy:       DMESG-FAIL (fdo#106103, fdo#102614) -> PASS

    
  fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
  fdo#106103 https://bugs.freedesktop.org/show_bug.cgi?id=106103


== Participating hosts (45 -> 40) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4409 -> Patchwork_9489

  CI_DRM_4409: bf99024d9c80d81968d3621ead0c0c05343fe826 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4532: 840d12e2f050b784552197403d6575a57b6e896d @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9489: 9c4a8cd83dff530c64eeffd8444ab89fa6221025 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

9c4a8cd83dff drm/i915: Track the last-active inside the i915_vma
2f5fde33d6a3 drm/i915: Track vma activity per fence.context, not per engine
962c9df7bb3c drm/i915: Move i915_vma_move_to_active() to i915_vma.c
8d7c654cc93c drm/i915: Start returning an error from i915_vma_move_to_active()
2f5a8e495d87 drm/i915: Export i915_request_skip()
607a75192843 drm/i915: Refactor export_fence() after i915_vma_move_to_active()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9489/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (6 preceding siblings ...)
  2018-06-29 23:19 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-06-30  3:03 ` Patchwork
  2018-07-02 11:34 ` [PATCH 1/6] " Tvrtko Ursulin
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2018-06-30  3:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
URL   : https://patchwork.freedesktop.org/series/45689/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4409_full -> Patchwork_9489_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9489_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9489_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9489_full:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_exec_schedule@deep-bsd1:
      shard-kbl:          SKIP -> PASS +1

    igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-render:
      shard-snb:          PASS -> SKIP +3

    
== Known issues ==

  Here are the changes found in Patchwork_9489_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_gtt:
      shard-apl:          PASS -> FAIL (fdo#105347)

    igt@drv_selftest@mock_scatterlist:
      shard-kbl:          NOTRUN -> DMESG-WARN (fdo#103667)

    igt@drv_suspend@shrink:
      shard-snb:          PASS -> FAIL (fdo#106886)

    igt@gem_tiled_fence_blits@normal:
      shard-snb:          NOTRUN -> INCOMPLETE (fdo#105411)

    igt@kms_flip@2x-modeset-vs-vblank-race-interruptible:
      shard-hsw:          PASS -> FAIL (fdo#103060)

    igt@kms_flip@plain-flip-fb-recreate-interruptible:
      shard-glk:          PASS -> FAIL (fdo#100368) +1

    
    ==== Possible fixes ====

    igt@kms_flip@2x-flip-vs-expired-vblank-interruptible:
      shard-glk:          FAIL (fdo#102887) -> PASS

    igt@kms_flip_tiling@flip-to-x-tiled:
      shard-glk:          FAIL (fdo#104724, fdo#103822) -> PASS

    igt@perf_pmu@busy-idle-no-semaphores-vecs0:
      shard-snb:          INCOMPLETE (fdo#105411) -> SKIP

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
  fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
  fdo#103667 https://bugs.freedesktop.org/show_bug.cgi?id=103667
  fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#105347 https://bugs.freedesktop.org/show_bug.cgi?id=105347
  fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411
  fdo#106886 https://bugs.freedesktop.org/show_bug.cgi?id=106886


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4409 -> Patchwork_9489

  CI_DRM_4409: bf99024d9c80d81968d3621ead0c0c05343fe826 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4532: 840d12e2f050b784552197403d6575a57b6e896d @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9489: 9c4a8cd83dff530c64eeffd8444ab89fa6221025 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9489/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (7 preceding siblings ...)
  2018-06-30  3:03 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-07-02 11:34 ` Tvrtko Ursulin
  2018-07-02 11:44   ` Chris Wilson
  2018-07-04  8:52 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev2) Patchwork
                   ` (6 subsequent siblings)
  15 siblings, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-02 11:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 23:54, Chris Wilson wrote:
> Currently all callers are responsible for adding the vma to the active
> timeline and then exporting its fence. Combine the two operations into
> i915_vma_move_to_active() to move all the extra handling from the
> callers to the single site.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 47 +++++++++----------
>   drivers/gpu/drm/i915/selftests/huge_pages.c   |  4 --
>   .../drm/i915/selftests/i915_gem_coherency.c   |  4 --
>   .../gpu/drm/i915/selftests/i915_gem_context.c |  4 --
>   .../gpu/drm/i915/selftests/i915_gem_object.c  |  4 --
>   5 files changed, 21 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index c2dd9b4cdace..91f20445147f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1166,15 +1166,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   
>   	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
>   	i915_vma_move_to_active(batch, rq, 0);
> -	reservation_object_lock(batch->resv, NULL);
> -	reservation_object_add_excl_fence(batch->resv, &rq->fence);
> -	reservation_object_unlock(batch->resv);
>   	i915_vma_unpin(batch);
>   
>   	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> -	reservation_object_lock(vma->resv, NULL);
> -	reservation_object_add_excl_fence(vma->resv, &rq->fence);
> -	reservation_object_unlock(vma->resv);
>   
>   	rq->batch = batch;
>   
> @@ -1771,25 +1765,6 @@ static int eb_relocate(struct i915_execbuffer *eb)
>   	return eb_relocate_slow(eb);
>   }
>   
> -static void eb_export_fence(struct i915_vma *vma,
> -			    struct i915_request *rq,
> -			    unsigned int flags)
> -{
> -	struct reservation_object *resv = vma->resv;
> -
> -	/*
> -	 * Ignore errors from failing to allocate the new fence, we can't
> -	 * handle an error right now. Worst case should be missed
> -	 * synchronisation leading to rendering corruption.
> -	 */
> -	reservation_object_lock(resv, NULL);
> -	if (flags & EXEC_OBJECT_WRITE)
> -		reservation_object_add_excl_fence(resv, &rq->fence);
> -	else if (reservation_object_reserve_shared(resv) == 0)
> -		reservation_object_add_shared_fence(resv, &rq->fence);
> -	reservation_object_unlock(resv);
> -}
> -
>   static int eb_move_to_gpu(struct i915_execbuffer *eb)
>   {
>   	const unsigned int count = eb->buffer_count;
> @@ -1844,7 +1819,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>   		struct i915_vma *vma = eb->vma[i];
>   
>   		i915_vma_move_to_active(vma, eb->request, flags);
> -		eb_export_fence(vma, eb->request, flags);
>   
>   		__eb_unreserve_vma(vma, flags);
>   		vma->exec_flags = NULL;
> @@ -1884,6 +1858,25 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
>   	return true;
>   }
>   
> +static void export_fence(struct i915_vma *vma,
> +			 struct i915_request *rq,
> +			 unsigned int flags)
> +{
> +	struct reservation_object *resv = vma->resv;
> +
> +	/*
> +	 * Ignore errors from failing to allocate the new fence, we can't
> +	 * handle an error right now. Worst case should be missed
> +	 * synchronisation leading to rendering corruption.
> +	 */
> +	reservation_object_lock(resv, NULL);
> +	if (flags & EXEC_OBJECT_WRITE)
> +		reservation_object_add_excl_fence(resv, &rq->fence);
> +	else if (reservation_object_reserve_shared(resv) == 0)
> +		reservation_object_add_shared_fence(resv, &rq->fence);
> +	reservation_object_unlock(resv);
> +}
> +
>   void i915_vma_move_to_active(struct i915_vma *vma,
>   			     struct i915_request *rq,
>   			     unsigned int flags)
> @@ -1921,6 +1914,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>   
>   	if (flags & EXEC_OBJECT_NEEDS_FENCE)
>   		i915_gem_active_set(&vma->last_fence, rq);
> +
> +	export_fence(vma, rq, flags);

Don't care about new extra reservation object operations from the other 
i915_move_to_active_callers?

Regards,

Tvrtko

>   }
>   
>   static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index b5e87fcdcdae..358fc81f6c99 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -998,10 +998,6 @@ static int gpu_write(struct i915_vma *vma,
>   
>   	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   
> -	reservation_object_lock(vma->resv, NULL);
> -	reservation_object_add_excl_fence(vma->resv, &rq->fence);
> -	reservation_object_unlock(vma->resv);
> -
>   err_request:
>   	i915_request_add(rq);
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index a4900091ae3d..11427aae0853 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -225,10 +225,6 @@ static int gpu_set(struct drm_i915_gem_object *obj,
>   	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   	i915_vma_unpin(vma);
>   
> -	reservation_object_lock(obj->resv, NULL);
> -	reservation_object_add_excl_fence(obj->resv, &rq->fence);
> -	reservation_object_unlock(obj->resv);
> -
>   	i915_request_add(rq);
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index cc848ceeb3c3..81ed87aa0a4d 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -178,10 +178,6 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>   	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   	i915_vma_unpin(vma);
>   
> -	reservation_object_lock(obj->resv, NULL);
> -	reservation_object_add_excl_fence(obj->resv, &rq->fence);
> -	reservation_object_unlock(obj->resv);
> -
>   	i915_request_add(rq);
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index 77dd7a510ea6..fa5a0654314a 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -456,10 +456,6 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
>   
>   	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   
> -	reservation_object_lock(vma->resv, NULL);
> -	reservation_object_add_excl_fence(vma->resv, &rq->fence);
> -	reservation_object_unlock(vma->resv);
> -
>   	i915_request_add(rq);
>   
>   	i915_gem_object_set_active_reference(obj);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/6] drm/i915: Export i915_request_skip()
  2018-06-29 22:54 ` [PATCH 2/6] drm/i915: Export i915_request_skip() Chris Wilson
@ 2018-07-02 11:37   ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-02 11:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 23:54, Chris Wilson wrote:
> In the next patch, we will want to start skipping requests on failing to
> complete their payloads. So export the utility function current used to
> make requests inoperable following a failed gpu reset.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c     | 25 +++----------------------
>   drivers/gpu/drm/i915/i915_request.c | 21 +++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_request.h |  2 ++
>   3 files changed, 26 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8954db6ab083..2ff6251483ce 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3074,25 +3074,6 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
>   	return err;
>   }
>   
> -static void skip_request(struct i915_request *request)
> -{
> -	void *vaddr = request->ring->vaddr;
> -	u32 head;
> -
> -	/* As this request likely depends on state from the lost
> -	 * context, clear out all the user operations leaving the
> -	 * breadcrumb at the end (so we get the fence notifications).
> -	 */
> -	head = request->head;
> -	if (request->postfix < head) {
> -		memset(vaddr + head, 0, request->ring->size - head);
> -		head = 0;
> -	}
> -	memset(vaddr + head, 0, request->postfix - head);
> -
> -	dma_fence_set_error(&request->fence, -EIO);
> -}
> -
>   static void engine_skip_context(struct i915_request *request)
>   {
>   	struct intel_engine_cs *engine = request->engine;
> @@ -3107,10 +3088,10 @@ static void engine_skip_context(struct i915_request *request)
>   
>   	list_for_each_entry_continue(request, &engine->timeline.requests, link)
>   		if (request->gem_context == hung_ctx)
> -			skip_request(request);
> +			i915_request_skip(request, -EIO);
>   
>   	list_for_each_entry(request, &timeline->requests, link)
> -		skip_request(request);
> +		i915_request_skip(request, -EIO);
>   
>   	spin_unlock(&timeline->lock);
>   	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> @@ -3153,7 +3134,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
>   
>   	if (stalled) {
>   		i915_gem_context_mark_guilty(request->gem_context);
> -		skip_request(request);
> +		i915_request_skip(request, -EIO);
>   
>   		/* If this context is now banned, skip all pending requests. */
>   		if (i915_gem_context_is_banned(request->gem_context))
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index a2f7e9358450..7ae08b68121e 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1013,6 +1013,27 @@ i915_request_await_object(struct i915_request *to,
>   	return ret;
>   }
>   
> +void i915_request_skip(struct i915_request *rq, int error)
> +{
> +	void *vaddr = rq->ring->vaddr;
> +	u32 head;
> +
> +	GEM_BUG_ON(!IS_ERR_VALUE((long)error));

This gets checked by dma_fence_set_error below but if you want to have 
it explicitly spelled out here OK.

> +	dma_fence_set_error(&rq->fence, error);
> +
> +	/*
> +	 * As this request likely depends on state from the lost
> +	 * context, clear out all the user operations leaving the
> +	 * breadcrumb at the end (so we get the fence notifications).
> +	 */
> +	head = rq->infix;
> +	if (rq->postfix < head) {
> +		memset(vaddr + head, 0, rq->ring->size - head);
> +		head = 0;
> +	}
> +	memset(vaddr + head, 0, rq->postfix - head);
> +}
> +
>   /*
>    * NB: This function is not allowed to fail. Doing so would mean the the
>    * request is not being tracked for completion but the work itself is
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 7ee220ded9c9..a355a081485f 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -258,6 +258,8 @@ void i915_request_add(struct i915_request *rq);
>   void __i915_request_submit(struct i915_request *request);
>   void i915_request_submit(struct i915_request *request);
>   
> +void i915_request_skip(struct i915_request *request, int error);
> +
>   void __i915_request_unsubmit(struct i915_request *request);
>   void i915_request_unsubmit(struct i915_request *request);
>   
> 

With sounds of grumbling for code movement with changes:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 3/6] drm/i915: Start returning an error from i915_vma_move_to_active()
  2018-06-29 22:54 ` [PATCH 3/6] drm/i915: Start returning an error from i915_vma_move_to_active() Chris Wilson
@ 2018-07-02 11:41   ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-02 11:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 23:54, Chris Wilson wrote:
> Handling such a late error in request construction is tricky, but to
> accommodate future patches which may allocate here, we potentially could
> err. To handle the error after already adjusting global state to track
> the new request, we must finish and submit the request. But we don't
> want to use the request as not everything is being tracked by it, so we
> opt to cancel the commands inside the request.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gvt/scheduler.c          |  6 ++++-
>   drivers/gpu/drm/i915/i915_drv.h               |  6 ++---
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 25 +++++++++++++------
>   drivers/gpu/drm/i915/i915_gem_render_state.c  |  2 +-
>   drivers/gpu/drm/i915/selftests/huge_pages.c   |  9 +++++--
>   .../drm/i915/selftests/i915_gem_coherency.c   |  4 +--
>   .../gpu/drm/i915/selftests/i915_gem_context.c | 12 +++++++--
>   .../gpu/drm/i915/selftests/i915_gem_object.c  |  7 +++---
>   drivers/gpu/drm/i915/selftests/i915_request.c |  8 ++++--
>   .../gpu/drm/i915/selftests/intel_hangcheck.c  | 11 ++++++--
>   drivers/gpu/drm/i915/selftests/intel_lrc.c    | 11 ++++++--
>   .../drm/i915/selftests/intel_workarounds.c    |  5 +++-
>   12 files changed, 78 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 928818f218f7..b0e566956b8d 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -476,7 +476,11 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
>   			i915_gem_obj_finish_shmem_access(bb->obj);
>   			bb->accessing = false;
>   
> -			i915_vma_move_to_active(bb->vma, workload->req, 0);
> +			ret = i915_vma_move_to_active(bb->vma,
> +						      workload->req,
> +						      0);
> +			if (ret)
> +				goto err;
>   		}
>   	}
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ce7d06332884..cd8f69a00e86 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3099,9 +3099,9 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
>   }
>   
>   int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
> -void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct i915_request *rq,
> -			     unsigned int flags);
> +int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> +					 struct i915_request *rq,
> +					 unsigned int flags);
>   int i915_gem_dumb_create(struct drm_file *file_priv,
>   			 struct drm_device *dev,
>   			 struct drm_mode_create_dumb *args);
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 91f20445147f..97136e4ce91d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1165,12 +1165,16 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   		goto err_request;
>   
>   	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
> -	i915_vma_move_to_active(batch, rq, 0);
> -	i915_vma_unpin(batch);
> +	err = i915_vma_move_to_active(batch, rq, 0);
> +	if (err)
> +		goto skip_request;
>   
> -	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	if (err)
> +		goto skip_request;
>   
>   	rq->batch = batch;
> +	i915_vma_unpin(batch);
>   
>   	cache->rq = rq;
>   	cache->rq_cmd = cmd;
> @@ -1179,6 +1183,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>   	/* Return with batch mapping (cmd) still pinned */
>   	return 0;
>   
> +skip_request:
> +	i915_request_skip(rq, err);
>   err_request:
>   	i915_request_add(rq);
>   err_unpin:
> @@ -1818,7 +1824,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>   		unsigned int flags = eb->flags[i];
>   		struct i915_vma *vma = eb->vma[i];
>   
> -		i915_vma_move_to_active(vma, eb->request, flags);
> +		err = i915_vma_move_to_active(vma, eb->request, flags);
> +		if (unlikely(err)) {
> +			i915_request_skip(eb->request, err);
> +			return err;
> +		}
>   
>   		__eb_unreserve_vma(vma, flags);
>   		vma->exec_flags = NULL;
> @@ -1877,9 +1887,9 @@ static void export_fence(struct i915_vma *vma,
>   	reservation_object_unlock(resv);
>   }
>   
> -void i915_vma_move_to_active(struct i915_vma *vma,
> -			     struct i915_request *rq,
> -			     unsigned int flags)
> +int i915_vma_move_to_active(struct i915_vma *vma,
> +			    struct i915_request *rq,
> +			    unsigned int flags)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
>   	const unsigned int idx = rq->engine->id;
> @@ -1916,6 +1926,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>   		i915_gem_active_set(&vma->last_fence, rq);
>   
>   	export_fence(vma, rq, flags);
> +	return 0;
>   }
>   
>   static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index 3210cedfa46c..90baf9086d0a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -222,7 +222,7 @@ int i915_gem_render_state_emit(struct i915_request *rq)
>   			goto err_unpin;
>   	}
>   
> -	i915_vma_move_to_active(so.vma, rq, 0);
> +	err = i915_vma_move_to_active(so.vma, rq, 0);
>   err_unpin:
>   	i915_vma_unpin(so.vma);
>   err_vma:
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index 358fc81f6c99..d33e20940e0a 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -985,7 +985,10 @@ static int gpu_write(struct i915_vma *vma,
>   		goto err_request;
>   	}
>   
> -	i915_vma_move_to_active(batch, rq, 0);
> +	err = i915_vma_move_to_active(batch, rq, 0);
> +	if (err)
> +		goto err_request;
> +
>   	i915_gem_object_set_active_reference(batch->obj);
>   	i915_vma_unpin(batch);
>   	i915_vma_close(batch);
> @@ -996,7 +999,9 @@ static int gpu_write(struct i915_vma *vma,
>   	if (err)
>   		goto err_request;
>   
> -	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	if (err)
> +		i915_request_skip(rq, err);
>   
>   err_request:
>   	i915_request_add(rq);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> index 11427aae0853..328585459c67 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
> @@ -222,12 +222,12 @@ static int gpu_set(struct drm_i915_gem_object *obj,
>   	}
>   	intel_ring_advance(rq, cs);
>   
> -	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   	i915_vma_unpin(vma);
>   
>   	i915_request_add(rq);
>   
> -	return 0;
> +	return err;
>   }
>   
>   static bool always_valid(struct drm_i915_private *i915)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 81ed87aa0a4d..da43f0a99eb2 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -170,18 +170,26 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
>   	if (err)
>   		goto err_request;
>   
> -	i915_vma_move_to_active(batch, rq, 0);
> +	err = i915_vma_move_to_active(batch, rq, 0);
> +	if (err)
> +		goto skip_request;
> +
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	if (err)
> +		goto skip_request;
> +
>   	i915_gem_object_set_active_reference(batch->obj);
>   	i915_vma_unpin(batch);
>   	i915_vma_close(batch);
>   
> -	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   	i915_vma_unpin(vma);
>   
>   	i915_request_add(rq);
>   
>   	return 0;
>   
> +skip_request:
> +	i915_request_skip(rq, err);
>   err_request:
>   	i915_request_add(rq);
>   err_batch:
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index fa5a0654314a..7d0ddef1519b 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -454,13 +454,14 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
>   		return PTR_ERR(rq);
>   	}
>   
> -	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   
>   	i915_request_add(rq);
>   
> -	i915_gem_object_set_active_reference(obj);
> +	__i915_gem_object_release_unless_active(obj);
>   	i915_vma_unpin(vma);
> -	return 0;
> +
> +	return err;
>   }
>   
>   static bool assert_mmap_offset(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
> index 521ae4a90ddf..c27f77a24ce0 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_request.c
> @@ -678,7 +678,9 @@ static int live_all_engines(void *arg)
>   			i915_gem_object_set_active_reference(batch->obj);
>   		}
>   
> -		i915_vma_move_to_active(batch, request[id], 0);
> +		err = i915_vma_move_to_active(batch, request[id], 0);
> +		GEM_BUG_ON(err);
> +
>   		i915_request_get(request[id]);
>   		i915_request_add(request[id]);
>   	}
> @@ -788,7 +790,9 @@ static int live_sequential_engines(void *arg)
>   		GEM_BUG_ON(err);
>   		request[id]->batch = batch;
>   
> -		i915_vma_move_to_active(batch, request[id], 0);
> +		err = i915_vma_move_to_active(batch, request[id], 0);
> +		GEM_BUG_ON(err);
> +
>   		i915_gem_object_set_active_reference(batch->obj);
>   		i915_vma_get(batch);
>   
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index fe7d3190ebfe..e11df2743704 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -130,13 +130,19 @@ static int emit_recurse_batch(struct hang *h,
>   	if (err)
>   		goto unpin_vma;
>   
> -	i915_vma_move_to_active(vma, rq, 0);
> +	err = i915_vma_move_to_active(vma, rq, 0);
> +	if (err)
> +		goto unpin_hws;
> +
>   	if (!i915_gem_object_has_active_reference(vma->obj)) {
>   		i915_gem_object_get(vma->obj);
>   		i915_gem_object_set_active_reference(vma->obj);
>   	}
>   
> -	i915_vma_move_to_active(hws, rq, 0);
> +	err = i915_vma_move_to_active(hws, rq, 0);
> +	if (err)
> +		goto unpin_hws;
> +
>   	if (!i915_gem_object_has_active_reference(hws->obj)) {
>   		i915_gem_object_get(hws->obj);
>   		i915_gem_object_set_active_reference(hws->obj);
> @@ -205,6 +211,7 @@ static int emit_recurse_batch(struct hang *h,
>   
>   	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
>   
> +unpin_hws:
>   	i915_vma_unpin(hws);
>   unpin_vma:
>   	i915_vma_unpin(vma);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index ea27c7cfbf96..b34f7ac6631e 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -104,13 +104,19 @@ static int emit_recurse_batch(struct spinner *spin,
>   	if (err)
>   		goto unpin_vma;
>   
> -	i915_vma_move_to_active(vma, rq, 0);
> +	err = i915_vma_move_to_active(vma, rq, 0);
> +	if (err)
> +		goto unpin_hws;
> +
>   	if (!i915_gem_object_has_active_reference(vma->obj)) {
>   		i915_gem_object_get(vma->obj);
>   		i915_gem_object_set_active_reference(vma->obj);
>   	}
>   
> -	i915_vma_move_to_active(hws, rq, 0);
> +	err = i915_vma_move_to_active(hws, rq, 0);
> +	if (err)
> +		goto unpin_hws;
> +
>   	if (!i915_gem_object_has_active_reference(hws->obj)) {
>   		i915_gem_object_get(hws->obj);
>   		i915_gem_object_set_active_reference(hws->obj);
> @@ -134,6 +140,7 @@ static int emit_recurse_batch(struct spinner *spin,
>   
>   	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
>   
> +unpin_hws:
>   	i915_vma_unpin(hws);
>   unpin_vma:
>   	i915_vma_unpin(vma);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index e1ea2d2bedd2..c100153cb494 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -49,6 +49,10 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   		goto err_pin;
>   	}
>   
> +	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +	if (err)
> +		goto err_req;
> +
>   	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>   	if (INTEL_GEN(ctx->i915) >= 8)
>   		srm++;
> @@ -67,7 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
>   	}
>   	intel_ring_advance(rq, cs);
>   
> -	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
>   	reservation_object_lock(vma->resv, NULL);
>   	reservation_object_add_excl_fence(vma->resv, &rq->fence);
>   	reservation_object_unlock(vma->resv);

Hm, above three lines should have been nuked in export_fence merge with 
i915_vma_move_to_active_patch..

Otherwise, gave r-b for this one already:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/6] drm/i915: Move i915_vma_move_to_active() to i915_vma.c
  2018-06-29 22:54 ` [PATCH 4/6] drm/i915: Move i915_vma_move_to_active() to i915_vma.c Chris Wilson
@ 2018-07-02 11:41   ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-02 11:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 23:54, Chris Wilson wrote:
> i915_vma_move_to_active() has grown beyond its execbuf origins, and
> should take its rightful place in i915_vma.c as a method for i915_vma!
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h            |  3 --
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 61 ----------------------
>   drivers/gpu/drm/i915/i915_vma.c            | 61 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_vma.h            |  4 ++
>   4 files changed, 65 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cd8f69a00e86..1c04872890d4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3099,9 +3099,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
>   }
>   
>   int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
> -int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> -					 struct i915_request *rq,
> -					 unsigned int flags);
>   int i915_gem_dumb_create(struct drm_file *file_priv,
>   			 struct drm_device *dev,
>   			 struct drm_mode_create_dumb *args);
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 97136e4ce91d..3f0c612d42e7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1868,67 +1868,6 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
>   	return true;
>   }
>   
> -static void export_fence(struct i915_vma *vma,
> -			 struct i915_request *rq,
> -			 unsigned int flags)
> -{
> -	struct reservation_object *resv = vma->resv;
> -
> -	/*
> -	 * Ignore errors from failing to allocate the new fence, we can't
> -	 * handle an error right now. Worst case should be missed
> -	 * synchronisation leading to rendering corruption.
> -	 */
> -	reservation_object_lock(resv, NULL);
> -	if (flags & EXEC_OBJECT_WRITE)
> -		reservation_object_add_excl_fence(resv, &rq->fence);
> -	else if (reservation_object_reserve_shared(resv) == 0)
> -		reservation_object_add_shared_fence(resv, &rq->fence);
> -	reservation_object_unlock(resv);
> -}
> -
> -int i915_vma_move_to_active(struct i915_vma *vma,
> -			    struct i915_request *rq,
> -			    unsigned int flags)
> -{
> -	struct drm_i915_gem_object *obj = vma->obj;
> -	const unsigned int idx = rq->engine->id;
> -
> -	lockdep_assert_held(&rq->i915->drm.struct_mutex);
> -	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> -
> -	/*
> -	 * Add a reference if we're newly entering the active list.
> -	 * The order in which we add operations to the retirement queue is
> -	 * vital here: mark_active adds to the start of the callback list,
> -	 * such that subsequent callbacks are called first. Therefore we
> -	 * add the active reference first and queue for it to be dropped
> -	 * *last*.
> -	 */
> -	if (!i915_vma_is_active(vma))
> -		obj->active_count++;
> -	i915_vma_set_active(vma, idx);
> -	i915_gem_active_set(&vma->last_read[idx], rq);
> -	list_move_tail(&vma->vm_link, &vma->vm->active_list);
> -
> -	obj->write_domain = 0;
> -	if (flags & EXEC_OBJECT_WRITE) {
> -		obj->write_domain = I915_GEM_DOMAIN_RENDER;
> -
> -		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
> -			i915_gem_active_set(&obj->frontbuffer_write, rq);
> -
> -		obj->read_domains = 0;
> -	}
> -	obj->read_domains |= I915_GEM_GPU_DOMAINS;
> -
> -	if (flags & EXEC_OBJECT_NEEDS_FENCE)
> -		i915_gem_active_set(&vma->last_fence, rq);
> -
> -	export_fence(vma, rq, flags);
> -	return 0;
> -}
> -
>   static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
>   {
>   	u32 *cs;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index d0e606e9b27a..7635c27e7e8b 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -859,6 +859,67 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
>   		list_del(&vma->obj->userfault_link);
>   }
>   
> +static void export_fence(struct i915_vma *vma,
> +			 struct i915_request *rq,
> +			 unsigned int flags)
> +{
> +	struct reservation_object *resv = vma->resv;
> +
> +	/*
> +	 * Ignore errors from failing to allocate the new fence, we can't
> +	 * handle an error right now. Worst case should be missed
> +	 * synchronisation leading to rendering corruption.
> +	 */
> +	reservation_object_lock(resv, NULL);
> +	if (flags & EXEC_OBJECT_WRITE)
> +		reservation_object_add_excl_fence(resv, &rq->fence);
> +	else if (reservation_object_reserve_shared(resv) == 0)
> +		reservation_object_add_shared_fence(resv, &rq->fence);
> +	reservation_object_unlock(resv);
> +}
> +
> +int i915_vma_move_to_active(struct i915_vma *vma,
> +			    struct i915_request *rq,
> +			    unsigned int flags)
> +{
> +	struct drm_i915_gem_object *obj = vma->obj;
> +	const unsigned int idx = rq->engine->id;
> +
> +	lockdep_assert_held(&rq->i915->drm.struct_mutex);
> +	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> +
> +	/*
> +	 * Add a reference if we're newly entering the active list.
> +	 * The order in which we add operations to the retirement queue is
> +	 * vital here: mark_active adds to the start of the callback list,
> +	 * such that subsequent callbacks are called first. Therefore we
> +	 * add the active reference first and queue for it to be dropped
> +	 * *last*.
> +	 */
> +	if (!i915_vma_is_active(vma))
> +		obj->active_count++;
> +	i915_vma_set_active(vma, idx);
> +	i915_gem_active_set(&vma->last_read[idx], rq);
> +	list_move_tail(&vma->vm_link, &vma->vm->active_list);
> +
> +	obj->write_domain = 0;
> +	if (flags & EXEC_OBJECT_WRITE) {
> +		obj->write_domain = I915_GEM_DOMAIN_RENDER;
> +
> +		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
> +			i915_gem_active_set(&obj->frontbuffer_write, rq);
> +
> +		obj->read_domains = 0;
> +	}
> +	obj->read_domains |= I915_GEM_GPU_DOMAINS;
> +
> +	if (flags & EXEC_OBJECT_NEEDS_FENCE)
> +		i915_gem_active_set(&vma->last_fence, rq);
> +
> +	export_fence(vma, rq, flags);
> +	return 0;
> +}
> +
>   int i915_vma_unbind(struct i915_vma *vma)
>   {
>   	unsigned long active;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 66a228931517..a218b689e418 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -215,6 +215,10 @@ static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
>   	return vma->active & BIT(engine);
>   }
>   
> +int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> +					 struct i915_request *rq,
> +					 unsigned int flags);
> +
>   static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
>   {
>   	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
  2018-07-02 11:34 ` [PATCH 1/6] " Tvrtko Ursulin
@ 2018-07-02 11:44   ` Chris Wilson
  2018-07-02 12:29     ` Tvrtko Ursulin
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2018-07-02 11:44 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-07-02 12:34:51)
> > @@ -1921,6 +1914,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
> >   
> >       if (flags & EXEC_OBJECT_NEEDS_FENCE)
> >               i915_gem_active_set(&vma->last_fence, rq);
> > +
> > +     export_fence(vma, rq, flags);
> 
> Don't care about new extra reservation object operations from the other 
> i915_move_to_active_callers?

No. Not filling the reservation_object is an underhand shortcut. It
should never be not correct to track the fences for an object. Doing it
in one place correctly, should prevent cheating.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active()
  2018-07-02 11:44   ` Chris Wilson
@ 2018-07-02 12:29     ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-02 12:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 02/07/2018 12:44, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-07-02 12:34:51)
>>> @@ -1921,6 +1914,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>>>    
>>>        if (flags & EXEC_OBJECT_NEEDS_FENCE)
>>>                i915_gem_active_set(&vma->last_fence, rq);
>>> +
>>> +     export_fence(vma, rq, flags);
>>
>> Don't care about new extra reservation object operations from the other
>> i915_move_to_active_callers?
> 
> No. Not filling the reservation_object is an underhand shortcut. It
> should never be not correct to track the fences for an object. Doing it
> in one place correctly, should prevent cheating.

Fair enough.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 5/6] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29 22:54 ` [PATCH 5/6] drm/i915: Track vma activity per fence.context, not per engine Chris Wilson
@ 2018-07-03 17:28   ` Tvrtko Ursulin
  2018-07-03 20:29     ` Chris Wilson
  2018-07-04  9:13   ` [PATCH v3] " Chris Wilson
  1 sibling, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-03 17:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 23:54, Chris Wilson wrote:
> In the next patch, we will want to be able to use more flexible request
> timelines that can hop between engines. From the vma pov, we can then
> not rely on the binding of this request to an engine and so can not
> ensure that different requests are ordered through a per-engine
> timeline, and so we must track activity of all timelines. (We track
> activity on the vma itself to prevent unbinding from HW before the HW
> has finished accessing it.)
> 
> v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
> index is fraught with aliasing of unsigned longs).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c   |   3 -
>   drivers/gpu/drm/i915/i915_gpu_error.c |  14 +---
>   drivers/gpu/drm/i915/i915_gpu_error.h |   2 +-
>   drivers/gpu/drm/i915/i915_request.h   |   1 +
>   drivers/gpu/drm/i915/i915_vma.c       | 112 +++++++++++++++++++-------
>   drivers/gpu/drm/i915/i915_vma.h       |  46 +++--------
>   6 files changed, 98 insertions(+), 80 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c6aa761ca085..50fcf74248f2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1996,7 +1996,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
>   	struct i915_ggtt *ggtt = &i915->ggtt;
>   	struct i915_vma *vma;
> -	int i;
>   
>   	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>   	GEM_BUG_ON(size > ggtt->vm.total);
> @@ -2005,8 +2004,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	if (!vma)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		init_request_active(&vma->last_read[i], NULL);

At first I thought you need to initialize the rb tree but figured out 
access to it is guarded by the active_count.

>   	init_request_active(&vma->last_fence, NULL);
>   
>   	vma->vm = &ggtt->vm;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index df524c9cad40..8c81cf3aa182 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>   				struct drm_i915_error_buffer *err,
>   				int count)
>   {
> -	int i;
> -
>   	err_printf(m, "%s [%d]:\n", name, count);
>   
>   	while (count--) {
> -		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
> +		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
>   			   upper_32_bits(err->gtt_offset),
>   			   lower_32_bits(err->gtt_offset),
>   			   err->size,
>   			   err->read_domains,
> -			   err->write_domain);
> -		for (i = 0; i < I915_NUM_ENGINES; i++)
> -			err_printf(m, "%02x ", err->rseqno[i]);
> -
> -		err_printf(m, "] %02x", err->wseqno);
> +			   err->write_domain,
> +			   err->wseqno);
>   		err_puts(m, tiling_flag(err->tiling));
>   		err_puts(m, dirty_flag(err->dirty));
>   		err_puts(m, purgeable_flag(err->purgeable));
> @@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>   		       struct i915_vma *vma)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	int i;
>   
>   	err->size = obj->base.size;
>   	err->name = obj->base.name;
>   
> -	for (i = 0; i < I915_NUM_ENGINES; i++)
> -		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);

We could still flatten the tree and capture the list of 
fence.context:seqno here, to be included in the error state. Or you 
think it is not useful?

>   	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
>   	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 58910f1dc67c..f893a4e8b783 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -177,7 +177,7 @@ struct i915_gpu_state {
>   	struct drm_i915_error_buffer {
>   		u32 size;
>   		u32 name;
> -		u32 rseqno[I915_NUM_ENGINES], wseqno;
> +		u32 wseqno;
>   		u64 gtt_offset;
>   		u32 read_domains;
>   		u32 write_domain;
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index a355a081485f..e1c9365dfefb 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -380,6 +380,7 @@ static inline void
>   init_request_active(struct i915_gem_active *active,
>   		    i915_gem_retire_fn retire)
>   {
> +	RCU_INIT_POINTER(active->request, NULL);
>   	INIT_LIST_HEAD(&active->link);
>   	active->retire = retire ?: i915_gem_retire_noop;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 7635c27e7e8b..2faad2a1d00e 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
>   
>   #endif
>   
> +struct i915_vma_active {
> +	struct i915_gem_active base;
> +	struct i915_vma *vma;
> +	struct rb_node node;
> +	u64 timeline;

If my quick calculations are correct this is (8 + 16 + 8) + 8 + 20 + 8 = 
68 large - just unluckily over the 64-byte slab so at some point I think 
it will warrant a dedicated slab to avoid wastage.

> +};
> +
>   static void
> -i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> +__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
>   {
> -	const unsigned int idx = rq->engine->id;
> -	struct i915_vma *vma =
> -		container_of(active, struct i915_vma, last_read[idx]);
>   	struct drm_i915_gem_object *obj = vma->obj;
>   
> -	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
> -
> -	i915_vma_clear_active(vma, idx);
> -	if (i915_vma_is_active(vma))
> +	GEM_BUG_ON(!i915_vma_is_active(vma));
> +	if (--vma->active_count)
>   		return;
>   
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> @@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
>   	}
>   }
>   
> +static void
> +i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	struct i915_vma_active *active =
> +		container_of(base, typeof(*active), base);
> +
> +	__i915_vma_retire(active->vma, rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj,
>   {
>   	struct i915_vma *vma;
>   	struct rb_node *rb, **p;
> -	int i;
>   
>   	/* The aliasing_ppgtt should never be used directly! */
>   	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> @@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj,
>   	if (vma == NULL)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		init_request_active(&vma->last_read[i], i915_vma_retire);
> +	vma->active = RB_ROOT;
> +
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma)
>   static void __i915_vma_destroy(struct i915_vma *vma)
>   {
>   	struct drm_i915_private *i915 = vma->vm->i915;
> -	int i;
> +	struct i915_vma_active *iter, *n;
>   
>   	GEM_BUG_ON(vma->node.allocated);
>   	GEM_BUG_ON(vma->fence);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
>   	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
>   
>   	list_del(&vma->obj_link);
> @@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>   	if (!i915_vma_is_ggtt(vma))
>   		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
>   
> +	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> +		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> +		kfree(iter);
> +	}
> +
>   	kmem_cache_free(i915->vmas, vma);
>   }
>   
> @@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma,
>   	reservation_object_unlock(resv);
>   }
>   
> +static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)

lookup_or_create? Or get_active?

> +{
> +	struct i915_vma_active *active;
> +	struct rb_node **p, *parent;
> +
> +	parent = NULL;
> +	p = &vma->active.rb_node;
> +	while (*p) {
> +		parent = *p;
> +
> +		active = rb_entry(parent, struct i915_vma_active, node);
> +		if (active->timeline == idx)
> +			return &active->base;
> +
> +		if (active->timeline < idx)
> +			p = &parent->rb_right;
> +		else
> +			p = &parent->rb_left;
> +	}
> +
> +	active = kmalloc(sizeof(*active), GFP_KERNEL);
> +	if (unlikely(!active))
> +		return ERR_PTR(-ENOMEM);
> +
> +	init_request_active(&active->base, i915_vma_retire);
> +	active->vma = vma;
> +	active->timeline = idx;
> +
> +	rb_link_node(&active->node, parent, p);
> +	rb_insert_color(&active->node, &vma->active);
> +
> +	return &active->base;
> +}
> +
>   int i915_vma_move_to_active(struct i915_vma *vma,
>   			    struct i915_request *rq,
>   			    unsigned int flags)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	const unsigned int idx = rq->engine->id;
> +	struct i915_gem_active *active;
>   
>   	lockdep_assert_held(&rq->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   
> +	active = lookup_active(vma, rq->fence.context);
> +	if (IS_ERR(active))
> +		return PTR_ERR(active);
> +
>   	/*
>   	 * Add a reference if we're newly entering the active list.
>   	 * The order in which we add operations to the retirement queue is
> @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   	 * add the active reference first and queue for it to be dropped
>   	 * *last*.
>   	 */
> -	if (!i915_vma_is_active(vma))
> +	if (!i915_gem_active_isset(active) && !vma->active_count++) {

vma->active_count (which is i915_vma_is_active) check wouldn't be 
enough? Can it be zero with active _set_?

> +		list_move_tail(&vma->vm_link, &vma->vm->active_list);
>   		obj->active_count++;
> -	i915_vma_set_active(vma, idx);
> -	i915_gem_active_set(&vma->last_read[idx], rq);
> -	list_move_tail(&vma->vm_link, &vma->vm->active_list);
> +	}
> +	i915_gem_active_set(active, rq);
> +	GEM_BUG_ON(!i915_vma_is_active(vma));
> +	GEM_BUG_ON(!obj->active_count);
>   
>   	obj->write_domain = 0;
>   	if (flags & EXEC_OBJECT_WRITE) {
> @@ -922,7 +975,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   
>   int i915_vma_unbind(struct i915_vma *vma)
>   {
> -	unsigned long active;
>   	int ret;
>   
>   	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> @@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>   	 * have side-effects such as unpinning or even unbinding this vma.
>   	 */
>   	might_sleep();
> -	active = i915_vma_get_active(vma);
> -	if (active) {
> -		int idx;
> +	if (i915_vma_is_active(vma)) {

Guarding the tree with count might become problematic when removing 
struct mutex. So perhaps it would better to rely that the tree can 
always be iterated, and when it is empty it is automatically a no-op.

> +		struct i915_vma_active *active, *n;
>   
>   		/*
>   		 * When a closed VMA is retired, it is unbound - eek.
> @@ -951,18 +1002,17 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> -		for_each_active(active, idx) {
> -			ret = i915_gem_active_retire(&vma->last_read[idx],
> +		rbtree_postorder_for_each_entry_safe(active, n,
> +						     &vma->active, node) {
> +			ret = i915_gem_active_retire(&active->base,
>   						     &vma->vm->i915->drm.struct_mutex);
>   			if (ret)
> -				break;
> -		}
> -
> -		if (!ret) {
> -			ret = i915_gem_active_retire(&vma->last_fence,
> -						     &vma->vm->i915->drm.struct_mutex);
> +				goto unpin;
>   		}
>   
> +		ret = i915_gem_active_retire(&vma->last_fence,
> +					     &vma->vm->i915->drm.struct_mutex);
> +unpin:
>   		__i915_vma_unpin(vma);
>   		if (ret)
>   			return ret;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index a218b689e418..c297b0a0dc47 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -26,6 +26,7 @@
>   #define __I915_VMA_H__
>   
>   #include <linux/io-mapping.h>
> +#include <linux/rbtree.h>
>   
>   #include <drm/drm_mm.h>
>   
> @@ -94,8 +95,8 @@ struct i915_vma {
>   #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
>   #define I915_VMA_GGTT_WRITE	BIT(12)
>   
> -	unsigned int active;
> -	struct i915_gem_active last_read[I915_NUM_ENGINES];
> +	unsigned int active_count;
> +	struct rb_root active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> @@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   
>   void i915_vma_unpin_and_release(struct i915_vma **p_vma);
>   
> +static inline bool i915_vma_is_active(struct i915_vma *vma)
> +{
> +	return vma->active_count;
> +}
> +
> +int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> +					 struct i915_request *rq,
> +					 unsigned int flags);
> +
>   static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
>   {
>   	return vma->flags & I915_VMA_GGTT;
> @@ -187,38 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
>   	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
>   }
>   
> -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
> -{
> -	return vma->active;
> -}
> -
> -static inline bool i915_vma_is_active(const struct i915_vma *vma)
> -{
> -	return i915_vma_get_active(vma);
> -}
> -
> -static inline void i915_vma_set_active(struct i915_vma *vma,
> -				       unsigned int engine)
> -{
> -	vma->active |= BIT(engine);
> -}
> -
> -static inline void i915_vma_clear_active(struct i915_vma *vma,
> -					 unsigned int engine)
> -{
> -	vma->active &= ~BIT(engine);
> -}
> -
> -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
> -					      unsigned int engine)
> -{
> -	return vma->active & BIT(engine);
> -}
> -
> -int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> -					 struct i915_request *rq,
> -					 unsigned int flags);
> -
>   static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
>   {
>   	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> 

Looks fine in general.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma
  2018-06-29 22:54 ` [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma Chris Wilson
@ 2018-07-03 17:40   ` Tvrtko Ursulin
  2018-07-04  8:34   ` [PATCH v2] " Chris Wilson
  1 sibling, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-03 17:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 29/06/2018 23:54, Chris Wilson wrote:
> Using a VMA on more than one timeline concurrently is the exception
> rather than the rule (using it concurrently on multiple engines). As we
> expect to only use one active tracker, store the most recently used
> tracker inside the i915_vma itself and only fallback to the radixtree if

Is a rbtree now.

> we need a second or more concurrent active trackers.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_vma.c | 36 +++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/i915_vma.h |  1 +
>   2 files changed, 35 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 2faad2a1d00e..9b69d24c5cf1 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
>   	__i915_vma_retire(active->vma, rq);
>   }
>   
> +static void
> +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>   
>   	vma->active = RB_ROOT;
>   
> +	init_request_active(&vma->last_active, i915_vma_last_retire);
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -895,6 +902,15 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   {
>   	struct i915_vma_active *active;
>   	struct rb_node **p, *parent;
> +	struct i915_request *old;
> +
> +	old = i915_gem_active_raw(&vma->last_active,
> +				  &vma->vm->i915->drm.struct_mutex);
> +	if (!old || old->fence.context == idx)
> +		goto out;

Put a comment for this block explaining the caching optimization.

> +
> +	/* Move the currently active fence into the rbtree */
> +	idx = old->fence.context;

I find "currently active fence" a bit confusing. It is just the "last 
accessed/cached fence", no?

>   
>   	parent = NULL;
>   	p = &vma->active.rb_node;
> @@ -903,7 +919,7 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   
>   		active = rb_entry(parent, struct i915_vma_active, node);
>   		if (active->timeline == idx)
> -			return &active->base;
> +			goto replace;
>   
>   		if (active->timeline < idx)
>   			p = &parent->rb_right;
> @@ -922,7 +938,18 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   	rb_link_node(&active->node, parent, p);
>   	rb_insert_color(&active->node, &vma->active);
>   
> -	return &active->base;
> +replace:
> +	if (i915_gem_active_isset(&active->base)) {
> +		__list_del_entry(&active->base.link);
> +		vma->active_count--;
> +		GEM_BUG_ON(!vma->active_count);
> +	}

When does this trigger? Please put a comment for this block.

> +	GEM_BUG_ON(list_empty(&vma->last_active.link));
> +	list_replace_init(&vma->last_active.link, &active->base.link);
> +	active->base.request = fetch_and_zero(&vma->last_active.request);
> +
> +out:
> +	return &vma->last_active;
>   }
>   
>   int i915_vma_move_to_active(struct i915_vma *vma,
> @@ -1002,6 +1029,11 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> +		ret = i915_gem_active_retire(&vma->last_active,
> +					     &vma->vm->i915->drm.struct_mutex);

Dang.. there goes my idea to loop/iterate purely over the tree..

> +		if (ret)
> +			goto unpin;
> +
>   		rbtree_postorder_for_each_entry_safe(active, n,
>   						     &vma->active, node) {
>   			ret = i915_gem_active_retire(&active->base,
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index c297b0a0dc47..f06d66377107 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -97,6 +97,7 @@ struct i915_vma {
>   
>   	unsigned int active_count;
>   	struct rb_root active;
> +	struct i915_gem_active last_active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> 

Okay I get the idea and it is a good optimisation for the common case.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 5/6] drm/i915: Track vma activity per fence.context, not per engine
  2018-07-03 17:28   ` Tvrtko Ursulin
@ 2018-07-03 20:29     ` Chris Wilson
  2018-07-04  9:43       ` Tvrtko Ursulin
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2018-07-03 20:29 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-07-03 18:28:31)
> 
> On 29/06/2018 23:54, Chris Wilson wrote:
> > In the next patch, we will want to be able to use more flexible request
> > timelines that can hop between engines. From the vma pov, we can then
> > not rely on the binding of this request to an engine and so can not
> > ensure that different requests are ordered through a per-engine
> > timeline, and so we must track activity of all timelines. (We track
> > activity on the vma itself to prevent unbinding from HW before the HW
> > has finished accessing it.)
> > 
> > v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
> > index is fraught with aliasing of unsigned longs).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_gtt.c   |   3 -
> >   drivers/gpu/drm/i915/i915_gpu_error.c |  14 +---
> >   drivers/gpu/drm/i915/i915_gpu_error.h |   2 +-
> >   drivers/gpu/drm/i915/i915_request.h   |   1 +
> >   drivers/gpu/drm/i915/i915_vma.c       | 112 +++++++++++++++++++-------
> >   drivers/gpu/drm/i915/i915_vma.h       |  46 +++--------
> >   6 files changed, 98 insertions(+), 80 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index c6aa761ca085..50fcf74248f2 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -1996,7 +1996,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
> >       struct drm_i915_private *i915 = ppgtt->base.vm.i915;
> >       struct i915_ggtt *ggtt = &i915->ggtt;
> >       struct i915_vma *vma;
> > -     int i;
> >   
> >       GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
> >       GEM_BUG_ON(size > ggtt->vm.total);
> > @@ -2005,8 +2004,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
> >       if (!vma)
> >               return ERR_PTR(-ENOMEM);
> >   
> > -     for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> > -             init_request_active(&vma->last_read[i], NULL);
> 
> At first I thought you need to initialize the rb tree but figured out 
> access to it is guarded by the active_count.

Or just NULL initialised. Oversight / rebase errors, it should have been
vma-active = RB_ROOT, looks like I skipped the radixtree init in earlier
versions.

> >       init_request_active(&vma->last_fence, NULL);
> >   
> >       vma->vm = &ggtt->vm;
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index df524c9cad40..8c81cf3aa182 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
> >                               struct drm_i915_error_buffer *err,
> >                               int count)
> >   {
> > -     int i;
> > -
> >       err_printf(m, "%s [%d]:\n", name, count);
> >   
> >       while (count--) {
> > -             err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
> > +             err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
> >                          upper_32_bits(err->gtt_offset),
> >                          lower_32_bits(err->gtt_offset),
> >                          err->size,
> >                          err->read_domains,
> > -                        err->write_domain);
> > -             for (i = 0; i < I915_NUM_ENGINES; i++)
> > -                     err_printf(m, "%02x ", err->rseqno[i]);
> > -
> > -             err_printf(m, "] %02x", err->wseqno);
> > +                        err->write_domain,
> > +                        err->wseqno);
> >               err_puts(m, tiling_flag(err->tiling));
> >               err_puts(m, dirty_flag(err->dirty));
> >               err_puts(m, purgeable_flag(err->purgeable));
> > @@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
> >                      struct i915_vma *vma)
> >   {
> >       struct drm_i915_gem_object *obj = vma->obj;
> > -     int i;
> >   
> >       err->size = obj->base.size;
> >       err->name = obj->base.name;
> >   
> > -     for (i = 0; i < I915_NUM_ENGINES; i++)
> > -             err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
> 
> We could still flatten the tree and capture the list of 
> fence.context:seqno here, to be included in the error state. Or you 
> think it is not useful?

I haven't looked at it for a long time, it's just noise to me at the
moment. Yes, we could flatten the tree, but to be honest I just took the
opportunity to kill it. (Who wants to add an variable array to our
unstuctured output, along with trying to say what those timelines are
the vma are active on.)

What I use the vma for in the error state is for working out what
buffers are active, and if the batch buffer tallies. (Severe errors
where userspace lied, it's anybody's guess.) The active seqno isn't
useful in this regard, since it almost always tells us of a later
request in the queue at the time of the hang. What we don't have now (or
after this patch) is the precise list of vma/buffers userspace sent
along with the batch. But it's never been critical enough to worry,
since the list of active is close enough.

> >       err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
> >       err->engine = __active_get_engine_id(&obj->frontbuffer_write);
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> > index 58910f1dc67c..f893a4e8b783 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> > @@ -177,7 +177,7 @@ struct i915_gpu_state {
> >       struct drm_i915_error_buffer {
> >               u32 size;
> >               u32 name;
> > -             u32 rseqno[I915_NUM_ENGINES], wseqno;
> > +             u32 wseqno;
> >               u64 gtt_offset;
> >               u32 read_domains;
> >               u32 write_domain;
> > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> > index a355a081485f..e1c9365dfefb 100644
> > --- a/drivers/gpu/drm/i915/i915_request.h
> > +++ b/drivers/gpu/drm/i915/i915_request.h
> > @@ -380,6 +380,7 @@ static inline void
> >   init_request_active(struct i915_gem_active *active,
> >                   i915_gem_retire_fn retire)
> >   {
> > +     RCU_INIT_POINTER(active->request, NULL);
> >       INIT_LIST_HEAD(&active->link);
> >       active->retire = retire ?: i915_gem_retire_noop;
> >   }
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index 7635c27e7e8b..2faad2a1d00e 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
> >   
> >   #endif
> >   
> > +struct i915_vma_active {
> > +     struct i915_gem_active base;
> > +     struct i915_vma *vma;
> > +     struct rb_node node;
> > +     u64 timeline;
> 
> If my quick calculations are correct this is (8 + 16 + 8) + 8 + 20 + 8 = 
> 68 large - just unluckily over the 64-byte slab so at some point I think 
> it will warrant a dedicated slab to avoid wastage.

Hmm, isn't it 7 pointers + a u64.

 sizeof(i915_vma_active)=72
 offsetofend(i915_vma_active.base)=32
 offsetofend(i915_vma_active.vma)=40
 offsetofend(i915_vma_active.node)=64
 offsetofend(i915_vma_active.timeline)=72

Bah i915_gem_active is bigger than I remember.

> >   static void
> > -i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> > +__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
> >   {
> > -     const unsigned int idx = rq->engine->id;
> > -     struct i915_vma *vma =
> > -             container_of(active, struct i915_vma, last_read[idx]);
> >       struct drm_i915_gem_object *obj = vma->obj;
> >   
> > -     GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
> > -
> > -     i915_vma_clear_active(vma, idx);
> > -     if (i915_vma_is_active(vma))
> > +     GEM_BUG_ON(!i915_vma_is_active(vma));
> > +     if (--vma->active_count)
> >               return;
> >   
> >       GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> > @@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> >       }
> >   }
> >   
> > +static void
> > +i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> > +{
> > +     struct i915_vma_active *active =
> > +             container_of(base, typeof(*active), base);
> > +
> > +     __i915_vma_retire(active->vma, rq);
> > +}
> > +
> >   static struct i915_vma *
> >   vma_create(struct drm_i915_gem_object *obj,
> >          struct i915_address_space *vm,
> > @@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj,
> >   {
> >       struct i915_vma *vma;
> >       struct rb_node *rb, **p;
> > -     int i;
> >   
> >       /* The aliasing_ppgtt should never be used directly! */
> >       GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> > @@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj,
> >       if (vma == NULL)
> >               return ERR_PTR(-ENOMEM);
> >   
> > -     for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> > -             init_request_active(&vma->last_read[i], i915_vma_retire);
> > +     vma->active = RB_ROOT;
> > +
> >       init_request_active(&vma->last_fence, NULL);
> >       vma->vm = vm;
> >       vma->ops = &vm->vma_ops;
> > @@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma)
> >   static void __i915_vma_destroy(struct i915_vma *vma)
> >   {
> >       struct drm_i915_private *i915 = vma->vm->i915;
> > -     int i;
> > +     struct i915_vma_active *iter, *n;
> >   
> >       GEM_BUG_ON(vma->node.allocated);
> >       GEM_BUG_ON(vma->fence);
> >   
> > -     for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> > -             GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
> >       GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
> >   
> >       list_del(&vma->obj_link);
> > @@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
> >       if (!i915_vma_is_ggtt(vma))
> >               i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
> >   
> > +     rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> > +             GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> > +             kfree(iter);
> > +     }
> > +
> >       kmem_cache_free(i915->vmas, vma);
> >   }
> >   
> > @@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma,
> >       reservation_object_unlock(resv);
> >   }
> >   
> > +static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> 
> lookup_or_create? Or get_active?

If we're having this argument, active_instance. It's what we settled on
last time for i915_gem_object_lookup_vma_or_create.

> >       /*
> >        * Add a reference if we're newly entering the active list.
> >        * The order in which we add operations to the retirement queue is
> > @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
> >        * add the active reference first and queue for it to be dropped
> >        * *last*.
> >        */
> > -     if (!i915_vma_is_active(vma))
> > +     if (!i915_gem_active_isset(active) && !vma->active_count++) {
> 
> vma->active_count (which is i915_vma_is_active) check wouldn't be 
> enough? Can it be zero with active _set_?

No, in the next patch we can have active_count with vma->active unset.

> > @@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma)
> >        * have side-effects such as unpinning or even unbinding this vma.
> >        */
> >       might_sleep();
> > -     active = i915_vma_get_active(vma);
> > -     if (active) {
> > -             int idx;
> > +     if (i915_vma_is_active(vma)) {
> 
> Guarding the tree with count might become problematic when removing 
> struct mutex. So perhaps it would better to rely that the tree can 
> always be iterated, and when it is empty it is automatically a no-op.

Iterating the tree still has the same serialisation requirement, so it
doesn't become easier just because we remove vma->active_count. The
vma->active + vma->active_count is guarded by vm->mutex (details to be
thoroughly tested).

The global request lock is an unsolved problem. Close to it being the
last problem (other than trying to recover the single client perf loss).

I'm not sure if i915_gem_active can survive in its current form.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v2] drm/i915: Track the last-active inside the i915_vma
  2018-06-29 22:54 ` [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma Chris Wilson
  2018-07-03 17:40   ` Tvrtko Ursulin
@ 2018-07-04  8:34   ` Chris Wilson
  2018-07-04  9:39     ` Tvrtko Ursulin
  2018-07-05 11:38     ` Tvrtko Ursulin
  1 sibling, 2 replies; 40+ messages in thread
From: Chris Wilson @ 2018-07-04  8:34 UTC (permalink / raw)
  To: intel-gfx

Using a VMA on more than one timeline concurrently is the exception
rather than the rule (using it concurrently on multiple engines). As we
expect to only use one active tracker, store the most recently used
tracker inside the i915_vma itself and only fallback to the rbtree if
we need a second or more concurrent active trackers.

v2: Comments on how we overwrite any existing last_active cache.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_vma.c | 50 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_vma.h |  1 +
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index cd94ffc7f079..33925e00f7e8 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
 	__i915_vma_retire(active->vma, rq);
 }
 
+static void
+i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
 
 	vma->active = RB_ROOT;
 
+	init_request_active(&vma->last_active, i915_vma_last_retire);
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -895,6 +902,22 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 {
 	struct i915_vma_active *active;
 	struct rb_node **p, *parent;
+	struct i915_request *old;
+
+	/*
+	 * We track the most recently used timeline to skip a rbtree search
+	 * for the common case, under typical loads we never need the rbtree
+	 * at all. We can reuse the last_active slot if it is empty, that is
+	 * after the previous activity has been retired, or if the active
+	 * matches the current timeline.
+	 */
+	old = i915_gem_active_raw(&vma->last_active,
+				  &vma->vm->i915->drm.struct_mutex);
+	if (!old || old->fence.context == idx)
+		goto out;
+
+	/* Move the currently active fence into the rbtree */
+	idx = old->fence.context;
 
 	parent = NULL;
 	p = &vma->active.rb_node;
@@ -903,7 +926,7 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 
 		active = rb_entry(parent, struct i915_vma_active, node);
 		if (active->timeline == idx)
-			return &active->base;
+			goto replace;
 
 		if (active->timeline < idx)
 			p = &parent->rb_right;
@@ -922,7 +945,25 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
 	rb_link_node(&active->node, parent, p);
 	rb_insert_color(&active->node, &vma->active);
 
-	return &active->base;
+replace:
+	/*
+	 * Overwrite the previous active slot in the rbtree with last_active,
+	 * leaving last_active zeroed. If the previous slot is still active,
+	 * we must be careful as we now only expect to recieve one retire
+	 * callback not two, and so much undo the active counting for the
+	 * overwritten slot.
+	 */
+	if (i915_gem_active_isset(&active->base)) {
+		__list_del_entry(&active->base.link);
+		vma->active_count--;
+		GEM_BUG_ON(!vma->active_count);
+	}
+	GEM_BUG_ON(list_empty(&vma->last_active.link));
+	list_replace_init(&vma->last_active.link, &active->base.link);
+	active->base.request = fetch_and_zero(&vma->last_active.request);
+
+out:
+	return &vma->last_active;
 }
 
 int i915_vma_move_to_active(struct i915_vma *vma,
@@ -1002,6 +1043,11 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
+		ret = i915_gem_active_retire(&vma->last_active,
+					     &vma->vm->i915->drm.struct_mutex);
+		if (ret)
+			goto unpin;
+
 		rbtree_postorder_for_each_entry_safe(active, n,
 						     &vma->active, node) {
 			ret = i915_gem_active_retire(&active->base,
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index c297b0a0dc47..f06d66377107 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -97,6 +97,7 @@ struct i915_vma {
 
 	unsigned int active_count;
 	struct rb_root active;
+	struct i915_gem_active last_active;
 	struct i915_gem_active last_fence;
 
 	/**
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev2)
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (8 preceding siblings ...)
  2018-07-02 11:34 ` [PATCH 1/6] " Tvrtko Ursulin
@ 2018-07-04  8:52 ` Patchwork
  2018-07-04  8:55 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2018-07-04  8:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev2)
URL   : https://patchwork.freedesktop.org/series/45689/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
0f2a81bb4d94 drm/i915: Refactor export_fence() after i915_vma_move_to_active()
bdd079924fd6 drm/i915: Export i915_request_skip()
36825b164208 drm/i915: Start returning an error from i915_vma_move_to_active()
3dbdeaac3703 drm/i915: Move i915_vma_move_to_active() to i915_vma.c
bec581e58385 drm/i915: Track vma activity per fence.context, not per engine
e6be0d999f86 drm/i915: Track the last-active inside the i915_vma
-:83: WARNING:TYPO_SPELLING: 'recieve' may be misspelled - perhaps 'receive'?
#83: FILE: drivers/gpu/drm/i915/i915_vma.c:952:
+	 * we must be careful as we now only expect to recieve one retire

total: 0 errors, 1 warnings, 0 checks, 93 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev2)
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (9 preceding siblings ...)
  2018-07-04  8:52 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev2) Patchwork
@ 2018-07-04  8:55 ` Patchwork
  2018-07-04  9:07 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2018-07-04  8:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev2)
URL   : https://patchwork.freedesktop.org/series/45689/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Refactor export_fence() after i915_vma_move_to_active()
Okay!

Commit: drm/i915: Export i915_request_skip()
Okay!

Commit: drm/i915: Start returning an error from i915_vma_move_to_active()
Okay!

Commit: drm/i915: Move i915_vma_move_to_active() to i915_vma.c
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3664:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3661:16: warning: expression using sizeof(void)

Commit: drm/i915: Track vma activity per fence.context, not per engine
Okay!

Commit: drm/i915: Track the last-active inside the i915_vma
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev2)
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (10 preceding siblings ...)
  2018-07-04  8:55 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-07-04  9:07 ` Patchwork
  2018-07-04  9:34 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3) Patchwork
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2018-07-04  9:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev2)
URL   : https://patchwork.freedesktop.org/series/45689/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4425 -> Patchwork_9518 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/45689/revisions/2/mbox/

== Known issues ==

  Here are the changes found in Patchwork_9518 that come from known issues:

  === IGT changes ===

    ==== Possible fixes ====

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
      fi-bxt-dsi:         INCOMPLETE (fdo#103927) -> PASS

    
  fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927


== Participating hosts (46 -> 41) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4425 -> Patchwork_9518

  CI_DRM_4425: 655f2c563a64861ef52008d968092a62644caf96 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4534: aeb3f4143572b81a907921ad9442858aafe1931e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9518: e6be0d999f86f9c9f372f8f41ffdabb305ab7d78 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

e6be0d999f86 drm/i915: Track the last-active inside the i915_vma
bec581e58385 drm/i915: Track vma activity per fence.context, not per engine
3dbdeaac3703 drm/i915: Move i915_vma_move_to_active() to i915_vma.c
36825b164208 drm/i915: Start returning an error from i915_vma_move_to_active()
bdd079924fd6 drm/i915: Export i915_request_skip()
0f2a81bb4d94 drm/i915: Refactor export_fence() after i915_vma_move_to_active()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9518/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v3] drm/i915: Track vma activity per fence.context, not per engine
  2018-06-29 22:54 ` [PATCH 5/6] drm/i915: Track vma activity per fence.context, not per engine Chris Wilson
  2018-07-03 17:28   ` Tvrtko Ursulin
@ 2018-07-04  9:13   ` Chris Wilson
  2018-07-04 11:19     ` Tvrtko Ursulin
  1 sibling, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2018-07-04  9:13 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will want to be able to use more flexible request
timelines that can hop between engines. From the vma pov, we can then
not rely on the binding of this request to an engine and so can not
ensure that different requests are ordered through a per-engine
timeline, and so we must track activity of all timelines. (We track
activity on the vma itself to prevent unbinding from HW before the HW
has finished accessing it.)

v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
index is fraught with aliasing of unsigned longs).
v3: s/lookup_active/active_instance/ because we can never agree on names

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c   |   5 +-
 drivers/gpu/drm/i915/i915_gpu_error.c |  14 +---
 drivers/gpu/drm/i915/i915_gpu_error.h |   2 +-
 drivers/gpu/drm/i915/i915_request.h   |   1 +
 drivers/gpu/drm/i915/i915_vma.c       | 112 +++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_vma.h       |  46 +++--------
 6 files changed, 100 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c6aa761ca085..216463fb3a1c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1996,7 +1996,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	struct i915_vma *vma;
-	int i;
 
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(size > ggtt->vm.total);
@@ -2005,14 +2004,14 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	if (!vma)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], NULL);
 	init_request_active(&vma->last_fence, NULL);
 
 	vma->vm = &ggtt->vm;
 	vma->ops = &pd_vma_ops;
 	vma->private = ppgtt;
 
+	vma->active = RB_ROOT;
+
 	vma->size = size;
 	vma->fence_size = size;
 	vma->flags = I915_VMA_GGTT;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index df524c9cad40..8c81cf3aa182 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				struct drm_i915_error_buffer *err,
 				int count)
 {
-	int i;
-
 	err_printf(m, "%s [%d]:\n", name, count);
 
 	while (count--) {
-		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
+		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
 			   upper_32_bits(err->gtt_offset),
 			   lower_32_bits(err->gtt_offset),
 			   err->size,
 			   err->read_domains,
-			   err->write_domain);
-		for (i = 0; i < I915_NUM_ENGINES; i++)
-			err_printf(m, "%02x ", err->rseqno[i]);
-
-		err_printf(m, "] %02x", err->wseqno);
+			   err->write_domain,
+			   err->wseqno);
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
@@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	int i;
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
 
-	for (i = 0; i < I915_NUM_ENGINES; i++)
-		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
 	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
 	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 58910f1dc67c..f893a4e8b783 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -177,7 +177,7 @@ struct i915_gpu_state {
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
-		u32 rseqno[I915_NUM_ENGINES], wseqno;
+		u32 wseqno;
 		u64 gtt_offset;
 		u32 read_domains;
 		u32 write_domain;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index a355a081485f..e1c9365dfefb 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -380,6 +380,7 @@ static inline void
 init_request_active(struct i915_gem_active *active,
 		    i915_gem_retire_fn retire)
 {
+	RCU_INIT_POINTER(active->request, NULL);
 	INIT_LIST_HEAD(&active->link);
 	active->retire = retire ?: i915_gem_retire_noop;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index c608579d03e9..4a2bafffb231 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 
 #endif
 
+struct i915_vma_active {
+	struct i915_gem_active base;
+	struct i915_vma *vma;
+	struct rb_node node;
+	u64 timeline;
+};
+
 static void
-i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
+__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
 {
-	const unsigned int idx = rq->engine->id;
-	struct i915_vma *vma =
-		container_of(active, struct i915_vma, last_read[idx]);
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
-
-	i915_vma_clear_active(vma, idx);
-	if (i915_vma_is_active(vma))
+	GEM_BUG_ON(!i915_vma_is_active(vma));
+	if (--vma->active_count)
 		return;
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
 	}
 }
 
+static void
+i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	struct i915_vma_active *active =
+		container_of(base, typeof(*active), base);
+
+	__i915_vma_retire(active->vma, rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 	struct rb_node *rb, **p;
-	int i;
 
 	/* The aliasing_ppgtt should never be used directly! */
 	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
@@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		init_request_active(&vma->last_read[i], i915_vma_retire);
+	vma->active = RB_ROOT;
+
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma)
 static void __i915_vma_destroy(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
-	int i;
+	struct i915_vma_active *iter, *n;
 
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(vma->fence);
 
-	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
-		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
 	list_del(&vma->obj_link);
@@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	if (!i915_vma_is_ggtt(vma))
 		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 
+	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
+		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
+		kfree(iter);
+	}
+
 	kmem_cache_free(i915->vmas, vma);
 }
 
@@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma,
 	reservation_object_unlock(resv);
 }
 
+static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
+{
+	struct i915_vma_active *active;
+	struct rb_node **p, *parent;
+
+	parent = NULL;
+	p = &vma->active.rb_node;
+	while (*p) {
+		parent = *p;
+
+		active = rb_entry(parent, struct i915_vma_active, node);
+		if (active->timeline == idx)
+			return &active->base;
+
+		if (active->timeline < idx)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+
+	active = kmalloc(sizeof(*active), GFP_KERNEL);
+	if (unlikely(!active))
+		return ERR_PTR(-ENOMEM);
+
+	init_request_active(&active->base, i915_vma_retire);
+	active->vma = vma;
+	active->timeline = idx;
+
+	rb_link_node(&active->node, parent, p);
+	rb_insert_color(&active->node, &vma->active);
+
+	return &active->base;
+}
+
 int i915_vma_move_to_active(struct i915_vma *vma,
 			    struct i915_request *rq,
 			    unsigned int flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
-	const unsigned int idx = rq->engine->id;
+	struct i915_gem_active *active;
 
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
+	active = active_instance(vma, rq->fence.context);
+	if (IS_ERR(active))
+		return PTR_ERR(active);
+
 	/*
 	 * Add a reference if we're newly entering the active list.
 	 * The order in which we add operations to the retirement queue is
@@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_vma_is_active(vma))
+	if (!i915_gem_active_isset(active) && !vma->active_count++) {
+		list_move_tail(&vma->vm_link, &vma->vm->active_list);
 		obj->active_count++;
-	i915_vma_set_active(vma, idx);
-	i915_gem_active_set(&vma->last_read[idx], rq);
-	list_move_tail(&vma->vm_link, &vma->vm->active_list);
+	}
+	i915_gem_active_set(active, rq);
+	GEM_BUG_ON(!i915_vma_is_active(vma));
+	GEM_BUG_ON(!obj->active_count);
 
 	obj->write_domain = 0;
 	if (flags & EXEC_OBJECT_WRITE) {
@@ -922,7 +975,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 
 int i915_vma_unbind(struct i915_vma *vma)
 {
-	unsigned long active;
 	int ret;
 
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	 * have side-effects such as unpinning or even unbinding this vma.
 	 */
 	might_sleep();
-	active = i915_vma_get_active(vma);
-	if (active) {
-		int idx;
+	if (i915_vma_is_active(vma)) {
+		struct i915_vma_active *active, *n;
 
 		/*
 		 * When a closed VMA is retired, it is unbound - eek.
@@ -951,18 +1002,17 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
-		for_each_active(active, idx) {
-			ret = i915_gem_active_retire(&vma->last_read[idx],
+		rbtree_postorder_for_each_entry_safe(active, n,
+						     &vma->active, node) {
+			ret = i915_gem_active_retire(&active->base,
 						     &vma->vm->i915->drm.struct_mutex);
 			if (ret)
-				break;
-		}
-
-		if (!ret) {
-			ret = i915_gem_active_retire(&vma->last_fence,
-						     &vma->vm->i915->drm.struct_mutex);
+				goto unpin;
 		}
 
+		ret = i915_gem_active_retire(&vma->last_fence,
+					     &vma->vm->i915->drm.struct_mutex);
+unpin:
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index a218b689e418..c297b0a0dc47 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -26,6 +26,7 @@
 #define __I915_VMA_H__
 
 #include <linux/io-mapping.h>
+#include <linux/rbtree.h>
 
 #include <drm/drm_mm.h>
 
@@ -94,8 +95,8 @@ struct i915_vma {
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
 #define I915_VMA_GGTT_WRITE	BIT(12)
 
-	unsigned int active;
-	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	unsigned int active_count;
+	struct rb_root active;
 	struct i915_gem_active last_fence;
 
 	/**
@@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 
 void i915_vma_unpin_and_release(struct i915_vma **p_vma);
 
+static inline bool i915_vma_is_active(struct i915_vma *vma)
+{
+	return vma->active_count;
+}
+
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+					 struct i915_request *rq,
+					 unsigned int flags);
+
 static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_GGTT;
@@ -187,38 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
-static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
-{
-	return vma->active;
-}
-
-static inline bool i915_vma_is_active(const struct i915_vma *vma)
-{
-	return i915_vma_get_active(vma);
-}
-
-static inline void i915_vma_set_active(struct i915_vma *vma,
-				       unsigned int engine)
-{
-	vma->active |= BIT(engine);
-}
-
-static inline void i915_vma_clear_active(struct i915_vma *vma,
-					 unsigned int engine)
-{
-	vma->active &= ~BIT(engine);
-}
-
-static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
-					      unsigned int engine)
-{
-	return vma->active & BIT(engine);
-}
-
-int __must_check i915_vma_move_to_active(struct i915_vma *vma,
-					 struct i915_request *rq,
-					 unsigned int flags);
-
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3)
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (11 preceding siblings ...)
  2018-07-04  9:07 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-07-04  9:34 ` Patchwork
  2018-07-04  9:37 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2018-07-04  9:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3)
URL   : https://patchwork.freedesktop.org/series/45689/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
dd7709cafcb7 drm/i915: Refactor export_fence() after i915_vma_move_to_active()
6c424d1ea401 drm/i915: Export i915_request_skip()
03109608e569 drm/i915: Start returning an error from i915_vma_move_to_active()
5e86631283fa drm/i915: Move i915_vma_move_to_active() to i915_vma.c
daf3f8cfc6d5 drm/i915: Track vma activity per fence.context, not per engine
3a97493679be drm/i915: Track the last-active inside the i915_vma
-:83: WARNING:TYPO_SPELLING: 'recieve' may be misspelled - perhaps 'receive'?
#83: FILE: drivers/gpu/drm/i915/i915_vma.c:952:
+	 * we must be careful as we now only expect to recieve one retire

total: 0 errors, 1 warnings, 0 checks, 93 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3)
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (12 preceding siblings ...)
  2018-07-04  9:34 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3) Patchwork
@ 2018-07-04  9:37 ` Patchwork
  2018-07-04  9:49 ` ✓ Fi.CI.BAT: success " Patchwork
  2018-07-04 10:43 ` ✓ Fi.CI.IGT: " Patchwork
  15 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2018-07-04  9:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3)
URL   : https://patchwork.freedesktop.org/series/45689/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Refactor export_fence() after i915_vma_move_to_active()
Okay!

Commit: drm/i915: Export i915_request_skip()
Okay!

Commit: drm/i915: Start returning an error from i915_vma_move_to_active()
Okay!

Commit: drm/i915: Move i915_vma_move_to_active() to i915_vma.c
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3664:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3661:16: warning: expression using sizeof(void)

Commit: drm/i915: Track vma activity per fence.context, not per engine
Okay!

Commit: drm/i915: Track the last-active inside the i915_vma
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] drm/i915: Track the last-active inside the i915_vma
  2018-07-04  8:34   ` [PATCH v2] " Chris Wilson
@ 2018-07-04  9:39     ` Tvrtko Ursulin
  2018-07-04 11:34       ` Tvrtko Ursulin
  2018-07-05 11:38     ` Tvrtko Ursulin
  1 sibling, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-04  9:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 04/07/2018 09:34, Chris Wilson wrote:
> Using a VMA on more than one timeline concurrently is the exception
> rather than the rule (using it concurrently on multiple engines). As we
> expect to only use one active tracker, store the most recently used
> tracker inside the i915_vma itself and only fallback to the rbtree if
> we need a second or more concurrent active trackers.
> 
> v2: Comments on how we overwrite any existing last_active cache.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_vma.c | 50 +++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/i915_vma.h |  1 +
>   2 files changed, 49 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index cd94ffc7f079..33925e00f7e8 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
>   	__i915_vma_retire(active->vma, rq);
>   }
>   
> +static void
> +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>   
>   	vma->active = RB_ROOT;
>   
> +	init_request_active(&vma->last_active, i915_vma_last_retire);
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -895,6 +902,22 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   {
>   	struct i915_vma_active *active;
>   	struct rb_node **p, *parent;
> +	struct i915_request *old;
> +
> +	/*
> +	 * We track the most recently used timeline to skip a rbtree search
> +	 * for the common case, under typical loads we never need the rbtree
> +	 * at all. We can reuse the last_active slot if it is empty, that is
> +	 * after the previous activity has been retired, or if the active
> +	 * matches the current timeline.
> +	 */
> +	old = i915_gem_active_raw(&vma->last_active,
> +				  &vma->vm->i915->drm.struct_mutex);
> +	if (!old || old->fence.context == idx)
> +		goto out;
> +
> +	/* Move the currently active fence into the rbtree */
> +	idx = old->fence.context;
>   
>   	parent = NULL;
>   	p = &vma->active.rb_node;
> @@ -903,7 +926,7 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   
>   		active = rb_entry(parent, struct i915_vma_active, node);
>   		if (active->timeline == idx)
> -			return &active->base;
> +			goto replace;
>   
>   		if (active->timeline < idx)
>   			p = &parent->rb_right;
> @@ -922,7 +945,25 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   	rb_link_node(&active->node, parent, p);
>   	rb_insert_color(&active->node, &vma->active);
>   
> -	return &active->base;
> +replace:
> +	/*
> +	 * Overwrite the previous active slot in the rbtree with last_active,
> +	 * leaving last_active zeroed. If the previous slot is still active,
> +	 * we must be careful as we now only expect to recieve one retire

typo in receive

> +	 * callback not two, and so much undo the active counting for the
> +	 * overwritten slot.
> +	 */
> +	if (i915_gem_active_isset(&active->base)) {
> +		__list_del_entry(&active->base.link);
> +		vma->active_count--;
 > +		GEM_BUG_ON(!vma->active_count);

I still don't get this. The cache is exclusive, so when transferring a 
record from rbtree to last_active, why do we need to decrement the 
vma->active_count here? Don't get the part in the comment about two 
retires - do you really sometimes expect two - ie cache is not exclusive?

But the fact that lookup of a cached entry is a straight return, meaning 
vma->active_count is manipulated elsewhere, makes me think it is 
avoidable messing with it on this path as well.

Maybe the separation of duties between the callers and this function 
needs to be stronger.

Regards,

Tvrtko

> +	}
> +	GEM_BUG_ON(list_empty(&vma->last_active.link));
> +	list_replace_init(&vma->last_active.link, &active->base.link);
> +	active->base.request = fetch_and_zero(&vma->last_active.request);
> +
> +out:
> +	return &vma->last_active;
>   }
>   
>   int i915_vma_move_to_active(struct i915_vma *vma,
> @@ -1002,6 +1043,11 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> +		ret = i915_gem_active_retire(&vma->last_active,
> +					     &vma->vm->i915->drm.struct_mutex);
> +		if (ret)
> +			goto unpin;
> +
>   		rbtree_postorder_for_each_entry_safe(active, n,
>   						     &vma->active, node) {
>   			ret = i915_gem_active_retire(&active->base,
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index c297b0a0dc47..f06d66377107 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -97,6 +97,7 @@ struct i915_vma {
>   
>   	unsigned int active_count;
>   	struct rb_root active;
> +	struct i915_gem_active last_active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 5/6] drm/i915: Track vma activity per fence.context, not per engine
  2018-07-03 20:29     ` Chris Wilson
@ 2018-07-04  9:43       ` Tvrtko Ursulin
  2018-07-04  9:53         ` Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-04  9:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/07/2018 21:29, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-07-03 18:28:31)
>>
>> On 29/06/2018 23:54, Chris Wilson wrote:
>>> In the next patch, we will want to be able to use more flexible request
>>> timelines that can hop between engines. From the vma pov, we can then
>>> not rely on the binding of this request to an engine and so can not
>>> ensure that different requests are ordered through a per-engine
>>> timeline, and so we must track activity of all timelines. (We track
>>> activity on the vma itself to prevent unbinding from HW before the HW
>>> has finished accessing it.)
>>>
>>> v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
>>> index is fraught with aliasing of unsigned longs).

[snip]

>>> +struct i915_vma_active {
>>> +     struct i915_gem_active base;
>>> +     struct i915_vma *vma;
>>> +     struct rb_node node;
>>> +     u64 timeline;
>>
>> If my quick calculations are correct this is (8 + 16 + 8) + 8 + 20 + 8 =
>> 68 large - just unluckily over the 64-byte slab so at some point I think
>> it will warrant a dedicated slab to avoid wastage.
> 
> Hmm, isn't it 7 pointers + a u64.
> 
>   sizeof(i915_vma_active)=72
>   offsetofend(i915_vma_active.base)=32
>   offsetofend(i915_vma_active.vma)=40
>   offsetofend(i915_vma_active.node)=64
>   offsetofend(i915_vma_active.timeline)=72
> 
> Bah i915_gem_active is bigger than I remember.

So goes into the 96-byte bucket and waste is 24 bytes per entry. I 
thought there is only 128 bucket and 128 minus my incorrect 68 was much 
more. So OK, can leave it for later optimisation.

[snip]

>>>        /*
>>>         * Add a reference if we're newly entering the active list.
>>>         * The order in which we add operations to the retirement queue is
>>> @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>>>         * add the active reference first and queue for it to be dropped
>>>         * *last*.
>>>         */
>>> -     if (!i915_vma_is_active(vma))
>>> +     if (!i915_gem_active_isset(active) && !vma->active_count++) {
>>
>> vma->active_count (which is i915_vma_is_active) check wouldn't be
>> enough? Can it be zero with active _set_?
> 
> No, in the next patch we can have active_count with vma->active unset.

Definitely then please make it so it is only what is needed for this 
patch, and change it in the next.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3)
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (13 preceding siblings ...)
  2018-07-04  9:37 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-07-04  9:49 ` Patchwork
  2018-07-04 10:43 ` ✓ Fi.CI.IGT: " Patchwork
  15 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2018-07-04  9:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3)
URL   : https://patchwork.freedesktop.org/series/45689/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4425 -> Patchwork_9519 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/45689/revisions/3/mbox/


== Changes ==

  No changes found


== Participating hosts (46 -> 41) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4425 -> Patchwork_9519

  CI_DRM_4425: 655f2c563a64861ef52008d968092a62644caf96 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4534: aeb3f4143572b81a907921ad9442858aafe1931e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9519: 3a97493679be323bbc5e625d82f308d7569e770d @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

3a97493679be drm/i915: Track the last-active inside the i915_vma
daf3f8cfc6d5 drm/i915: Track vma activity per fence.context, not per engine
5e86631283fa drm/i915: Move i915_vma_move_to_active() to i915_vma.c
03109608e569 drm/i915: Start returning an error from i915_vma_move_to_active()
6c424d1ea401 drm/i915: Export i915_request_skip()
dd7709cafcb7 drm/i915: Refactor export_fence() after i915_vma_move_to_active()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9519/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 5/6] drm/i915: Track vma activity per fence.context, not per engine
  2018-07-04  9:43       ` Tvrtko Ursulin
@ 2018-07-04  9:53         ` Chris Wilson
  0 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2018-07-04  9:53 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-07-04 10:43:07)
> 
> On 03/07/2018 21:29, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-07-03 18:28:31)
> >>
> >> On 29/06/2018 23:54, Chris Wilson wrote:
> >>>        /*
> >>>         * Add a reference if we're newly entering the active list.
> >>>         * The order in which we add operations to the retirement queue is
> >>> @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
> >>>         * add the active reference first and queue for it to be dropped
> >>>         * *last*.
> >>>         */
> >>> -     if (!i915_vma_is_active(vma))
> >>> +     if (!i915_gem_active_isset(active) && !vma->active_count++) {
> >>
> >> vma->active_count (which is i915_vma_is_active) check wouldn't be
> >> enough? Can it be zero with active _set_?
> > 
> > No, in the next patch we can have active_count with vma->active unset.
> 
> Definitely then please make it so it is only what is needed for this 
> patch, and change it in the next.

It is. active_count == the number of retirement callbacks we will
receive, and so must not be incremented if we have already accounted for
this one. It is expressing the same old rule, having given up the bitmask and
knowing the state of each i915_gem_active_isset() simultaneously.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3)
  2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
                   ` (14 preceding siblings ...)
  2018-07-04  9:49 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-07-04 10:43 ` Patchwork
  15 siblings, 0 replies; 40+ messages in thread
From: Patchwork @ 2018-07-04 10:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3)
URL   : https://patchwork.freedesktop.org/series/45689/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4425_full -> Patchwork_9519_full =

== Summary - WARNING ==

  Minor unknown changes coming with Patchwork_9519_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9519_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9519_full:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_exec_schedule@deep-bsd2:
      shard-kbl:          PASS -> SKIP

    igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-shrfb-draw-mmap-wc:
      shard-hsw:          PASS -> SKIP

    igt@kms_frontbuffer_tracking@fbc-rgb565-draw-pwrite:
      shard-snb:          SKIP -> PASS +1

    
== Known issues ==

  Here are the changes found in Patchwork_9519_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@gem_ppgtt@blt-vs-render-ctx0:
      shard-kbl:          PASS -> INCOMPLETE (fdo#103665, fdo#106023)

    igt@kms_flip_tiling@flip-to-y-tiled:
      shard-glk:          PASS -> FAIL (fdo#104724, fdo#103822)

    igt@testdisplay:
      shard-glk:          NOTRUN -> INCOMPLETE (k.org#198133, fdo#103359)

    
    ==== Possible fixes ====

    igt@kms_setmode@basic:
      shard-apl:          FAIL (fdo#99912) -> PASS

    
    ==== Warnings ====

    igt@drv_selftest@live_gtt:
      shard-kbl:          INCOMPLETE (fdo#103665) -> FAIL (fdo#105347)
      shard-glk:          INCOMPLETE (k.org#198133, fdo#103359) -> FAIL (fdo#105347)

    
  fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822
  fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724
  fdo#105347 https://bugs.freedesktop.org/show_bug.cgi?id=105347
  fdo#106023 https://bugs.freedesktop.org/show_bug.cgi?id=106023
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
  k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * Linux: CI_DRM_4425 -> Patchwork_9519

  CI_DRM_4425: 655f2c563a64861ef52008d968092a62644caf96 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4534: aeb3f4143572b81a907921ad9442858aafe1931e @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9519: 3a97493679be323bbc5e625d82f308d7569e770d @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9519/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v3] drm/i915: Track vma activity per fence.context, not per engine
  2018-07-04  9:13   ` [PATCH v3] " Chris Wilson
@ 2018-07-04 11:19     ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-04 11:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 04/07/2018 10:13, Chris Wilson wrote:
> In the next patch, we will want to be able to use more flexible request
> timelines that can hop between engines. From the vma pov, we can then
> not rely on the binding of this request to an engine and so can not
> ensure that different requests are ordered through a per-engine
> timeline, and so we must track activity of all timelines. (We track
> activity on the vma itself to prevent unbinding from HW before the HW
> has finished accessing it.)
> 
> v2: Switch to a rbtree for 32b safety (since using u64 as a radixtree
> index is fraught with aliasing of unsigned longs).
> v3: s/lookup_active/active_instance/ because we can never agree on names
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c   |   5 +-
>   drivers/gpu/drm/i915/i915_gpu_error.c |  14 +---
>   drivers/gpu/drm/i915/i915_gpu_error.h |   2 +-
>   drivers/gpu/drm/i915/i915_request.h   |   1 +
>   drivers/gpu/drm/i915/i915_vma.c       | 112 +++++++++++++++++++-------
>   drivers/gpu/drm/i915/i915_vma.h       |  46 +++--------
>   6 files changed, 100 insertions(+), 80 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c6aa761ca085..216463fb3a1c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1996,7 +1996,6 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
>   	struct i915_ggtt *ggtt = &i915->ggtt;
>   	struct i915_vma *vma;
> -	int i;
>   
>   	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
>   	GEM_BUG_ON(size > ggtt->vm.total);
> @@ -2005,14 +2004,14 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>   	if (!vma)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		init_request_active(&vma->last_read[i], NULL);
>   	init_request_active(&vma->last_fence, NULL);
>   
>   	vma->vm = &ggtt->vm;
>   	vma->ops = &pd_vma_ops;
>   	vma->private = ppgtt;
>   
> +	vma->active = RB_ROOT;
> +
>   	vma->size = size;
>   	vma->fence_size = size;
>   	vma->flags = I915_VMA_GGTT;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index df524c9cad40..8c81cf3aa182 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -335,21 +335,16 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>   				struct drm_i915_error_buffer *err,
>   				int count)
>   {
> -	int i;
> -
>   	err_printf(m, "%s [%d]:\n", name, count);
>   
>   	while (count--) {
> -		err_printf(m, "    %08x_%08x %8u %02x %02x [ ",
> +		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
>   			   upper_32_bits(err->gtt_offset),
>   			   lower_32_bits(err->gtt_offset),
>   			   err->size,
>   			   err->read_domains,
> -			   err->write_domain);
> -		for (i = 0; i < I915_NUM_ENGINES; i++)
> -			err_printf(m, "%02x ", err->rseqno[i]);
> -
> -		err_printf(m, "] %02x", err->wseqno);
> +			   err->write_domain,
> +			   err->wseqno);
>   		err_puts(m, tiling_flag(err->tiling));
>   		err_puts(m, dirty_flag(err->dirty));
>   		err_puts(m, purgeable_flag(err->purgeable));
> @@ -1021,13 +1016,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>   		       struct i915_vma *vma)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	int i;
>   
>   	err->size = obj->base.size;
>   	err->name = obj->base.name;
>   
> -	for (i = 0; i < I915_NUM_ENGINES; i++)
> -		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
>   	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
>   	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 58910f1dc67c..f893a4e8b783 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -177,7 +177,7 @@ struct i915_gpu_state {
>   	struct drm_i915_error_buffer {
>   		u32 size;
>   		u32 name;
> -		u32 rseqno[I915_NUM_ENGINES], wseqno;
> +		u32 wseqno;
>   		u64 gtt_offset;
>   		u32 read_domains;
>   		u32 write_domain;
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index a355a081485f..e1c9365dfefb 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -380,6 +380,7 @@ static inline void
>   init_request_active(struct i915_gem_active *active,
>   		    i915_gem_retire_fn retire)
>   {
> +	RCU_INIT_POINTER(active->request, NULL);
>   	INIT_LIST_HEAD(&active->link);
>   	active->retire = retire ?: i915_gem_retire_noop;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index c608579d03e9..4a2bafffb231 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -63,18 +63,20 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
>   
>   #endif
>   
> +struct i915_vma_active {
> +	struct i915_gem_active base;
> +	struct i915_vma *vma;
> +	struct rb_node node;
> +	u64 timeline;
> +};
> +
>   static void
> -i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
> +__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
>   {
> -	const unsigned int idx = rq->engine->id;
> -	struct i915_vma *vma =
> -		container_of(active, struct i915_vma, last_read[idx]);
>   	struct drm_i915_gem_object *obj = vma->obj;
>   
> -	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
> -
> -	i915_vma_clear_active(vma, idx);
> -	if (i915_vma_is_active(vma))
> +	GEM_BUG_ON(!i915_vma_is_active(vma));
> +	if (--vma->active_count)
>   		return;
>   
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> @@ -108,6 +110,15 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq)
>   	}
>   }
>   
> +static void
> +i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	struct i915_vma_active *active =
> +		container_of(base, typeof(*active), base);
> +
> +	__i915_vma_retire(active->vma, rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -115,7 +126,6 @@ vma_create(struct drm_i915_gem_object *obj,
>   {
>   	struct i915_vma *vma;
>   	struct rb_node *rb, **p;
> -	int i;
>   
>   	/* The aliasing_ppgtt should never be used directly! */
>   	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
> @@ -124,8 +134,8 @@ vma_create(struct drm_i915_gem_object *obj,
>   	if (vma == NULL)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		init_request_active(&vma->last_read[i], i915_vma_retire);
> +	vma->active = RB_ROOT;
> +
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -778,13 +788,11 @@ void i915_vma_reopen(struct i915_vma *vma)
>   static void __i915_vma_destroy(struct i915_vma *vma)
>   {
>   	struct drm_i915_private *i915 = vma->vm->i915;
> -	int i;
> +	struct i915_vma_active *iter, *n;
>   
>   	GEM_BUG_ON(vma->node.allocated);
>   	GEM_BUG_ON(vma->fence);
>   
> -	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
> -		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
>   	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
>   
>   	list_del(&vma->obj_link);
> @@ -795,6 +803,11 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>   	if (!i915_vma_is_ggtt(vma))
>   		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
>   
> +	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> +		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> +		kfree(iter);
> +	}
> +
>   	kmem_cache_free(i915->vmas, vma);
>   }
>   
> @@ -878,16 +891,54 @@ static void export_fence(struct i915_vma *vma,
>   	reservation_object_unlock(resv);
>   }
>   
> +static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
> +{
> +	struct i915_vma_active *active;
> +	struct rb_node **p, *parent;
> +
> +	parent = NULL;
> +	p = &vma->active.rb_node;
> +	while (*p) {
> +		parent = *p;
> +
> +		active = rb_entry(parent, struct i915_vma_active, node);
> +		if (active->timeline == idx)
> +			return &active->base;
> +
> +		if (active->timeline < idx)
> +			p = &parent->rb_right;
> +		else
> +			p = &parent->rb_left;
> +	}
> +
> +	active = kmalloc(sizeof(*active), GFP_KERNEL);
> +	if (unlikely(!active))
> +		return ERR_PTR(-ENOMEM);
> +
> +	init_request_active(&active->base, i915_vma_retire);
> +	active->vma = vma;
> +	active->timeline = idx;
> +
> +	rb_link_node(&active->node, parent, p);
> +	rb_insert_color(&active->node, &vma->active);
> +
> +	return &active->base;
> +}
> +
>   int i915_vma_move_to_active(struct i915_vma *vma,
>   			    struct i915_request *rq,
>   			    unsigned int flags)
>   {
>   	struct drm_i915_gem_object *obj = vma->obj;
> -	const unsigned int idx = rq->engine->id;
> +	struct i915_gem_active *active;
>   
>   	lockdep_assert_held(&rq->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   
> +	active = active_instance(vma, rq->fence.context);
> +	if (IS_ERR(active))
> +		return PTR_ERR(active);
> +
>   	/*
>   	 * Add a reference if we're newly entering the active list.
>   	 * The order in which we add operations to the retirement queue is
> @@ -896,11 +947,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   	 * add the active reference first and queue for it to be dropped
>   	 * *last*.
>   	 */
> -	if (!i915_vma_is_active(vma))
> +	if (!i915_gem_active_isset(active) && !vma->active_count++) {
> +		list_move_tail(&vma->vm_link, &vma->vm->active_list);
>   		obj->active_count++;
> -	i915_vma_set_active(vma, idx);
> -	i915_gem_active_set(&vma->last_read[idx], rq);
> -	list_move_tail(&vma->vm_link, &vma->vm->active_list);
> +	}
> +	i915_gem_active_set(active, rq);
> +	GEM_BUG_ON(!i915_vma_is_active(vma));
> +	GEM_BUG_ON(!obj->active_count);
>   
>   	obj->write_domain = 0;
>   	if (flags & EXEC_OBJECT_WRITE) {
> @@ -922,7 +975,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>   
>   int i915_vma_unbind(struct i915_vma *vma)
>   {
> -	unsigned long active;
>   	int ret;
>   
>   	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> @@ -932,9 +984,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>   	 * have side-effects such as unpinning or even unbinding this vma.
>   	 */
>   	might_sleep();
> -	active = i915_vma_get_active(vma);
> -	if (active) {
> -		int idx;
> +	if (i915_vma_is_active(vma)) {
> +		struct i915_vma_active *active, *n;
>   
>   		/*
>   		 * When a closed VMA is retired, it is unbound - eek.
> @@ -951,18 +1002,17 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> -		for_each_active(active, idx) {
> -			ret = i915_gem_active_retire(&vma->last_read[idx],
> +		rbtree_postorder_for_each_entry_safe(active, n,
> +						     &vma->active, node) {
> +			ret = i915_gem_active_retire(&active->base,
>   						     &vma->vm->i915->drm.struct_mutex);
>   			if (ret)
> -				break;
> -		}
> -
> -		if (!ret) {
> -			ret = i915_gem_active_retire(&vma->last_fence,
> -						     &vma->vm->i915->drm.struct_mutex);
> +				goto unpin;
>   		}
>   
> +		ret = i915_gem_active_retire(&vma->last_fence,
> +					     &vma->vm->i915->drm.struct_mutex);
> +unpin:
>   		__i915_vma_unpin(vma);
>   		if (ret)
>   			return ret;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index a218b689e418..c297b0a0dc47 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -26,6 +26,7 @@
>   #define __I915_VMA_H__
>   
>   #include <linux/io-mapping.h>
> +#include <linux/rbtree.h>
>   
>   #include <drm/drm_mm.h>
>   
> @@ -94,8 +95,8 @@ struct i915_vma {
>   #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
>   #define I915_VMA_GGTT_WRITE	BIT(12)
>   
> -	unsigned int active;
> -	struct i915_gem_active last_read[I915_NUM_ENGINES];
> +	unsigned int active_count;
> +	struct rb_root active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> @@ -138,6 +139,15 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   
>   void i915_vma_unpin_and_release(struct i915_vma **p_vma);
>   
> +static inline bool i915_vma_is_active(struct i915_vma *vma)
> +{
> +	return vma->active_count;
> +}
> +
> +int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> +					 struct i915_request *rq,
> +					 unsigned int flags);
> +
>   static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
>   {
>   	return vma->flags & I915_VMA_GGTT;
> @@ -187,38 +197,6 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
>   	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
>   }
>   
> -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
> -{
> -	return vma->active;
> -}
> -
> -static inline bool i915_vma_is_active(const struct i915_vma *vma)
> -{
> -	return i915_vma_get_active(vma);
> -}
> -
> -static inline void i915_vma_set_active(struct i915_vma *vma,
> -				       unsigned int engine)
> -{
> -	vma->active |= BIT(engine);
> -}
> -
> -static inline void i915_vma_clear_active(struct i915_vma *vma,
> -					 unsigned int engine)
> -{
> -	vma->active &= ~BIT(engine);
> -}
> -
> -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
> -					      unsigned int engine)
> -{
> -	return vma->active & BIT(engine);
> -}
> -
> -int __must_check i915_vma_move_to_active(struct i915_vma *vma,
> -					 struct i915_request *rq,
> -					 unsigned int flags);
> -
>   static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
>   {
>   	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] drm/i915: Track the last-active inside the i915_vma
  2018-07-04  9:39     ` Tvrtko Ursulin
@ 2018-07-04 11:34       ` Tvrtko Ursulin
  2018-07-04 11:47         ` Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-04 11:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 04/07/2018 10:39, Tvrtko Ursulin wrote:
> 
> On 04/07/2018 09:34, Chris Wilson wrote:
>> Using a VMA on more than one timeline concurrently is the exception
>> rather than the rule (using it concurrently on multiple engines). As we
>> expect to only use one active tracker, store the most recently used
>> tracker inside the i915_vma itself and only fallback to the rbtree if
>> we need a second or more concurrent active trackers.
>>
>> v2: Comments on how we overwrite any existing last_active cache.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_vma.c | 50 +++++++++++++++++++++++++++++++--
>>   drivers/gpu/drm/i915/i915_vma.h |  1 +
>>   2 files changed, 49 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>> b/drivers/gpu/drm/i915/i915_vma.c
>> index cd94ffc7f079..33925e00f7e8 100644
>> --- a/drivers/gpu/drm/i915/i915_vma.c
>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, 
>> struct i915_request *rq)
>>       __i915_vma_retire(active->vma, rq);
>>   }
>> +static void
>> +i915_vma_last_retire(struct i915_gem_active *base, struct 
>> i915_request *rq)
>> +{
>> +    __i915_vma_retire(container_of(base, struct i915_vma, 
>> last_active), rq);
>> +}
>> +
>>   static struct i915_vma *
>>   vma_create(struct drm_i915_gem_object *obj,
>>          struct i915_address_space *vm,
>> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>>       vma->active = RB_ROOT;
>> +    init_request_active(&vma->last_active, i915_vma_last_retire);
>>       init_request_active(&vma->last_fence, NULL);
>>       vma->vm = vm;
>>       vma->ops = &vm->vma_ops;
>> @@ -895,6 +902,22 @@ static struct i915_gem_active 
>> *lookup_active(struct i915_vma *vma, u64 idx)
>>   {
>>       struct i915_vma_active *active;
>>       struct rb_node **p, *parent;
>> +    struct i915_request *old;
>> +
>> +    /*
>> +     * We track the most recently used timeline to skip a rbtree search
>> +     * for the common case, under typical loads we never need the rbtree
>> +     * at all. We can reuse the last_active slot if it is empty, that is
>> +     * after the previous activity has been retired, or if the active
>> +     * matches the current timeline.
>> +     */
>> +    old = i915_gem_active_raw(&vma->last_active,
>> +                  &vma->vm->i915->drm.struct_mutex);
>> +    if (!old || old->fence.context == idx)
>> +        goto out;
>> +
>> +    /* Move the currently active fence into the rbtree */
>> +    idx = old->fence.context;
>>       parent = NULL;
>>       p = &vma->active.rb_node;
>> @@ -903,7 +926,7 @@ static struct i915_gem_active 
>> *lookup_active(struct i915_vma *vma, u64 idx)
>>           active = rb_entry(parent, struct i915_vma_active, node);
>>           if (active->timeline == idx)
>> -            return &active->base;
>> +            goto replace;
>>           if (active->timeline < idx)
>>               p = &parent->rb_right;
>> @@ -922,7 +945,25 @@ static struct i915_gem_active 
>> *lookup_active(struct i915_vma *vma, u64 idx)
>>       rb_link_node(&active->node, parent, p);
>>       rb_insert_color(&active->node, &vma->active);
>> -    return &active->base;
>> +replace:
>> +    /*
>> +     * Overwrite the previous active slot in the rbtree with 
>> last_active,
>> +     * leaving last_active zeroed. If the previous slot is still active,
>> +     * we must be careful as we now only expect to recieve one retire
> 
> typo in receive
> 
>> +     * callback not two, and so much undo the active counting for the
>> +     * overwritten slot.
>> +     */
>> +    if (i915_gem_active_isset(&active->base)) {
>> +        __list_del_entry(&active->base.link);
>> +        vma->active_count--;
>  > +        GEM_BUG_ON(!vma->active_count);
> 
> I still don't get this. The cache is exclusive, so when transferring a 
> record from rbtree to last_active, why do we need to decrement the 
> vma->active_count here? Don't get the part in the comment about two 
> retires - do you really sometimes expect two - ie cache is not exclusive?
> 
> But the fact that lookup of a cached entry is a straight return, meaning 
> vma->active_count is manipulated elsewhere, makes me think it is 
> avoidable messing with it on this path as well.
> 
> Maybe the separation of duties between the callers and this function 
> needs to be stronger.

Hmm or is your cache actually inclusive? Don't see no rbtree 
manipulation on migration to and from last_active/rbtree..

And since rbtree lookup is always for the last_active context id, you 
would otherwise never hit the the "goto replace" path.

How do you ever look up an id which is not cached in last_active then?

I am thoroughly confused now..

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] drm/i915: Track the last-active inside the i915_vma
  2018-07-04 11:34       ` Tvrtko Ursulin
@ 2018-07-04 11:47         ` Chris Wilson
  2018-07-04 12:30           ` Tvrtko Ursulin
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2018-07-04 11:47 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-07-04 12:34:04)
> 
> On 04/07/2018 10:39, Tvrtko Ursulin wrote:
> > 
> > On 04/07/2018 09:34, Chris Wilson wrote:
> >> Using a VMA on more than one timeline concurrently is the exception
> >> rather than the rule (using it concurrently on multiple engines). As we
> >> expect to only use one active tracker, store the most recently used
> >> tracker inside the i915_vma itself and only fallback to the rbtree if
> >> we need a second or more concurrent active trackers.
> >>
> >> v2: Comments on how we overwrite any existing last_active cache.
> >>
> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_vma.c | 50 +++++++++++++++++++++++++++++++--
> >>   drivers/gpu/drm/i915/i915_vma.h |  1 +
> >>   2 files changed, 49 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_vma.c 
> >> b/drivers/gpu/drm/i915/i915_vma.c
> >> index cd94ffc7f079..33925e00f7e8 100644
> >> --- a/drivers/gpu/drm/i915/i915_vma.c
> >> +++ b/drivers/gpu/drm/i915/i915_vma.c
> >> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, 
> >> struct i915_request *rq)
> >>       __i915_vma_retire(active->vma, rq);
> >>   }
> >> +static void
> >> +i915_vma_last_retire(struct i915_gem_active *base, struct 
> >> i915_request *rq)
> >> +{
> >> +    __i915_vma_retire(container_of(base, struct i915_vma, 
> >> last_active), rq);
> >> +}
> >> +
> >>   static struct i915_vma *
> >>   vma_create(struct drm_i915_gem_object *obj,
> >>          struct i915_address_space *vm,
> >> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
> >>       vma->active = RB_ROOT;
> >> +    init_request_active(&vma->last_active, i915_vma_last_retire);
> >>       init_request_active(&vma->last_fence, NULL);
> >>       vma->vm = vm;
> >>       vma->ops = &vm->vma_ops;
> >> @@ -895,6 +902,22 @@ static struct i915_gem_active 
> >> *lookup_active(struct i915_vma *vma, u64 idx)
> >>   {
> >>       struct i915_vma_active *active;
> >>       struct rb_node **p, *parent;
> >> +    struct i915_request *old;
> >> +
> >> +    /*
> >> +     * We track the most recently used timeline to skip a rbtree search
> >> +     * for the common case, under typical loads we never need the rbtree
> >> +     * at all. We can reuse the last_active slot if it is empty, that is
> >> +     * after the previous activity has been retired, or if the active
> >> +     * matches the current timeline.
> >> +     */
> >> +    old = i915_gem_active_raw(&vma->last_active,
> >> +                  &vma->vm->i915->drm.struct_mutex);
> >> +    if (!old || old->fence.context == idx)
> >> +        goto out;
> >> +
> >> +    /* Move the currently active fence into the rbtree */
> >> +    idx = old->fence.context;
> >>       parent = NULL;
> >>       p = &vma->active.rb_node;
> >> @@ -903,7 +926,7 @@ static struct i915_gem_active 
> >> *lookup_active(struct i915_vma *vma, u64 idx)
> >>           active = rb_entry(parent, struct i915_vma_active, node);
> >>           if (active->timeline == idx)
> >> -            return &active->base;
> >> +            goto replace;
> >>           if (active->timeline < idx)
> >>               p = &parent->rb_right;
> >> @@ -922,7 +945,25 @@ static struct i915_gem_active 
> >> *lookup_active(struct i915_vma *vma, u64 idx)
> >>       rb_link_node(&active->node, parent, p);
> >>       rb_insert_color(&active->node, &vma->active);
> >> -    return &active->base;
> >> +replace:
> >> +    /*
> >> +     * Overwrite the previous active slot in the rbtree with 
> >> last_active,
> >> +     * leaving last_active zeroed. If the previous slot is still active,
> >> +     * we must be careful as we now only expect to recieve one retire
> > 
> > typo in receive
> > 
> >> +     * callback not two, and so much undo the active counting for the
> >> +     * overwritten slot.
> >> +     */
> >> +    if (i915_gem_active_isset(&active->base)) {
> >> +        __list_del_entry(&active->base.link);
> >> +        vma->active_count--;
> >  > +        GEM_BUG_ON(!vma->active_count);
> > 
> > I still don't get this. The cache is exclusive, so when transferring a 
> > record from rbtree to last_active, why do we need to decrement the 
> > vma->active_count here? Don't get the part in the comment about two 
> > retires - do you really sometimes expect two - ie cache is not exclusive?
> > 
> > But the fact that lookup of a cached entry is a straight return, meaning 
> > vma->active_count is manipulated elsewhere, makes me think it is 
> > avoidable messing with it on this path as well.
> > 
> > Maybe the separation of duties between the callers and this function 
> > needs to be stronger.
> 
> Hmm or is your cache actually inclusive? Don't see no rbtree 
> manipulation on migration to and from last_active/rbtree..

Both. Inclusive in the sense that both last_active and its timeline slot
in the rbtree may be active tracking different requests and so receive
retirement callbacks independently. Exclusive in that we don't store
last_active in the cache slot and in the rbtree.
 
> And since rbtree lookup is always for the last_active context id, you 
> would otherwise never hit the the "goto replace" path.
> 
> How do you ever look up an id which is not cached in last_active then?

We don't. We only lookup on evicting a still active request from
last_active. The MRU recent request always goes into last_active.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] drm/i915: Track the last-active inside the i915_vma
  2018-07-04 11:47         ` Chris Wilson
@ 2018-07-04 12:30           ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-04 12:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 04/07/2018 12:47, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-07-04 12:34:04)
>>
>> On 04/07/2018 10:39, Tvrtko Ursulin wrote:
>>>
>>> On 04/07/2018 09:34, Chris Wilson wrote:
>>>> Using a VMA on more than one timeline concurrently is the exception
>>>> rather than the rule (using it concurrently on multiple engines). As we
>>>> expect to only use one active tracker, store the most recently used
>>>> tracker inside the i915_vma itself and only fallback to the rbtree if
>>>> we need a second or more concurrent active trackers.
>>>>
>>>> v2: Comments on how we overwrite any existing last_active cache.
>>>>
>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/i915_vma.c | 50 +++++++++++++++++++++++++++++++--
>>>>    drivers/gpu/drm/i915/i915_vma.h |  1 +
>>>>    2 files changed, 49 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c
>>>> b/drivers/gpu/drm/i915/i915_vma.c
>>>> index cd94ffc7f079..33925e00f7e8 100644
>>>> --- a/drivers/gpu/drm/i915/i915_vma.c
>>>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>>>> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base,
>>>> struct i915_request *rq)
>>>>        __i915_vma_retire(active->vma, rq);
>>>>    }
>>>> +static void
>>>> +i915_vma_last_retire(struct i915_gem_active *base, struct
>>>> i915_request *rq)
>>>> +{
>>>> +    __i915_vma_retire(container_of(base, struct i915_vma,
>>>> last_active), rq);
>>>> +}
>>>> +
>>>>    static struct i915_vma *
>>>>    vma_create(struct drm_i915_gem_object *obj,
>>>>           struct i915_address_space *vm,
>>>> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>>>>        vma->active = RB_ROOT;
>>>> +    init_request_active(&vma->last_active, i915_vma_last_retire);
>>>>        init_request_active(&vma->last_fence, NULL);
>>>>        vma->vm = vm;
>>>>        vma->ops = &vm->vma_ops;
>>>> @@ -895,6 +902,22 @@ static struct i915_gem_active
>>>> *lookup_active(struct i915_vma *vma, u64 idx)
>>>>    {
>>>>        struct i915_vma_active *active;
>>>>        struct rb_node **p, *parent;
>>>> +    struct i915_request *old;
>>>> +
>>>> +    /*
>>>> +     * We track the most recently used timeline to skip a rbtree search
>>>> +     * for the common case, under typical loads we never need the rbtree
>>>> +     * at all. We can reuse the last_active slot if it is empty, that is
>>>> +     * after the previous activity has been retired, or if the active
>>>> +     * matches the current timeline.
>>>> +     */
>>>> +    old = i915_gem_active_raw(&vma->last_active,
>>>> +                  &vma->vm->i915->drm.struct_mutex);
>>>> +    if (!old || old->fence.context == idx)
>>>> +        goto out;
>>>> +
>>>> +    /* Move the currently active fence into the rbtree */
>>>> +    idx = old->fence.context;
>>>>        parent = NULL;
>>>>        p = &vma->active.rb_node;
>>>> @@ -903,7 +926,7 @@ static struct i915_gem_active
>>>> *lookup_active(struct i915_vma *vma, u64 idx)
>>>>            active = rb_entry(parent, struct i915_vma_active, node);
>>>>            if (active->timeline == idx)
>>>> -            return &active->base;
>>>> +            goto replace;
>>>>            if (active->timeline < idx)
>>>>                p = &parent->rb_right;
>>>> @@ -922,7 +945,25 @@ static struct i915_gem_active
>>>> *lookup_active(struct i915_vma *vma, u64 idx)
>>>>        rb_link_node(&active->node, parent, p);
>>>>        rb_insert_color(&active->node, &vma->active);
>>>> -    return &active->base;
>>>> +replace:
>>>> +    /*
>>>> +     * Overwrite the previous active slot in the rbtree with
>>>> last_active,
>>>> +     * leaving last_active zeroed. If the previous slot is still active,
>>>> +     * we must be careful as we now only expect to recieve one retire
>>>
>>> typo in receive
>>>
>>>> +     * callback not two, and so much undo the active counting for the
>>>> +     * overwritten slot.
>>>> +     */
>>>> +    if (i915_gem_active_isset(&active->base)) {
>>>> +        __list_del_entry(&active->base.link);
>>>> +        vma->active_count--;
>>>   > +        GEM_BUG_ON(!vma->active_count);
>>>
>>> I still don't get this. The cache is exclusive, so when transferring a
>>> record from rbtree to last_active, why do we need to decrement the
>>> vma->active_count here? Don't get the part in the comment about two
>>> retires - do you really sometimes expect two - ie cache is not exclusive?
>>>
>>> But the fact that lookup of a cached entry is a straight return, meaning
>>> vma->active_count is manipulated elsewhere, makes me think it is
>>> avoidable messing with it on this path as well.
>>>
>>> Maybe the separation of duties between the callers and this function
>>> needs to be stronger.
>>
>> Hmm or is your cache actually inclusive? Don't see no rbtree
>> manipulation on migration to and from last_active/rbtree..
> 
> Both. Inclusive in the sense that both last_active and its timeline slot
> in the rbtree may be active tracking different requests and so receive
> retirement callbacks independently. Exclusive in that we don't store
> last_active in the cache slot and in the rbtree.

I don't know how to reconcile between two sentences (statements). They 
seem in contradiction. :(
    >> And since rbtree lookup is always for the last_active context id, you
>> would otherwise never hit the the "goto replace" path.
>>
>> How do you ever look up an id which is not cached in last_active then?
> 
> We don't. We only lookup on evicting a still active request from
> last_active. The MRU recent request always goes into last_active.

Sorry lost, another day maybe.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] drm/i915: Track the last-active inside the i915_vma
  2018-07-04  8:34   ` [PATCH v2] " Chris Wilson
  2018-07-04  9:39     ` Tvrtko Ursulin
@ 2018-07-05 11:38     ` Tvrtko Ursulin
  2018-07-05 12:02       ` Chris Wilson
  1 sibling, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-05 11:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 04/07/2018 09:34, Chris Wilson wrote:
> Using a VMA on more than one timeline concurrently is the exception
> rather than the rule (using it concurrently on multiple engines). As we
> expect to only use one active tracker, store the most recently used
> tracker inside the i915_vma itself and only fallback to the rbtree if
> we need a second or more concurrent active trackers.
> 
> v2: Comments on how we overwrite any existing last_active cache.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_vma.c | 50 +++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/i915_vma.h |  1 +
>   2 files changed, 49 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index cd94ffc7f079..33925e00f7e8 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
>   	__i915_vma_retire(active->vma, rq);
>   }
>   
> +static void
> +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>   
>   	vma->active = RB_ROOT;
>   
> +	init_request_active(&vma->last_active, i915_vma_last_retire);
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -895,6 +902,22 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   {
>   	struct i915_vma_active *active;
>   	struct rb_node **p, *parent;
> +	struct i915_request *old;
> +
> +	/*
> +	 * We track the most recently used timeline to skip a rbtree search
> +	 * for the common case, under typical loads we never need the rbtree
> +	 * at all. We can reuse the last_active slot if it is empty, that is
> +	 * after the previous activity has been retired, or if the active
> +	 * matches the current timeline.
> +	 */
> +	old = i915_gem_active_raw(&vma->last_active,
> +				  &vma->vm->i915->drm.struct_mutex);
> +	if (!old || old->fence.context == idx)
> +		goto out;

Is the situation that retire can be out of order relative to 
move_to_active? In other words, last_active can retire before the rbtree 
record, and so the following new move_to_active will find last_active 
empty and so could create a double entry for the same timeline?

Avoiding that would defeat the caching, unless when last_active is 
available we also check the tree, *if* the vma->active_count > 0?

That way we avoid creating duplicate entries.

But would still need to pull out this tree entry into last_active after 
the fact.

Regards,

Tvrtko

> +
> +	/* Move the currently active fence into the rbtree */
> +	idx = old->fence.context;
>   
>   	parent = NULL;
>   	p = &vma->active.rb_node;
> @@ -903,7 +926,7 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   
>   		active = rb_entry(parent, struct i915_vma_active, node);
>   		if (active->timeline == idx)
> -			return &active->base;
> +			goto replace;
>   
>   		if (active->timeline < idx)
>   			p = &parent->rb_right;
> @@ -922,7 +945,25 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>   	rb_link_node(&active->node, parent, p);
>   	rb_insert_color(&active->node, &vma->active);
>   
> -	return &active->base;
> +replace:
> +	/*
> +	 * Overwrite the previous active slot in the rbtree with last_active,
> +	 * leaving last_active zeroed. If the previous slot is still active,
> +	 * we must be careful as we now only expect to recieve one retire
> +	 * callback not two, and so much undo the active counting for the
> +	 * overwritten slot.
> +	 */
> +	if (i915_gem_active_isset(&active->base)) {
> +		__list_del_entry(&active->base.link);
> +		vma->active_count--;
> +		GEM_BUG_ON(!vma->active_count);
> +	}
> +	GEM_BUG_ON(list_empty(&vma->last_active.link));
> +	list_replace_init(&vma->last_active.link, &active->base.link);
> +	active->base.request = fetch_and_zero(&vma->last_active.request);
> +
> +out:
> +	return &vma->last_active;
>   }
>   
>   int i915_vma_move_to_active(struct i915_vma *vma,
> @@ -1002,6 +1043,11 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> +		ret = i915_gem_active_retire(&vma->last_active,
> +					     &vma->vm->i915->drm.struct_mutex);
> +		if (ret)
> +			goto unpin;
> +
>   		rbtree_postorder_for_each_entry_safe(active, n,
>   						     &vma->active, node) {
>   			ret = i915_gem_active_retire(&active->base,
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index c297b0a0dc47..f06d66377107 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -97,6 +97,7 @@ struct i915_vma {
>   
>   	unsigned int active_count;
>   	struct rb_root active;
> +	struct i915_gem_active last_active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] drm/i915: Track the last-active inside the i915_vma
  2018-07-05 11:38     ` Tvrtko Ursulin
@ 2018-07-05 12:02       ` Chris Wilson
  2018-07-05 12:29         ` Tvrtko Ursulin
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2018-07-05 12:02 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-07-05 12:38:46)
> 
> On 04/07/2018 09:34, Chris Wilson wrote:
> > Using a VMA on more than one timeline concurrently is the exception
> > rather than the rule (using it concurrently on multiple engines). As we
> > expect to only use one active tracker, store the most recently used
> > tracker inside the i915_vma itself and only fallback to the rbtree if
> > we need a second or more concurrent active trackers.
> > 
> > v2: Comments on how we overwrite any existing last_active cache.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_vma.c | 50 +++++++++++++++++++++++++++++++--
> >   drivers/gpu/drm/i915/i915_vma.h |  1 +
> >   2 files changed, 49 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index cd94ffc7f079..33925e00f7e8 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> >       __i915_vma_retire(active->vma, rq);
> >   }
> >   
> > +static void
> > +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> > +{
> > +     __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> > +}
> > +
> >   static struct i915_vma *
> >   vma_create(struct drm_i915_gem_object *obj,
> >          struct i915_address_space *vm,
> > @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
> >   
> >       vma->active = RB_ROOT;
> >   
> > +     init_request_active(&vma->last_active, i915_vma_last_retire);
> >       init_request_active(&vma->last_fence, NULL);
> >       vma->vm = vm;
> >       vma->ops = &vm->vma_ops;
> > @@ -895,6 +902,22 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> >   {
> >       struct i915_vma_active *active;
> >       struct rb_node **p, *parent;
> > +     struct i915_request *old;
> > +
> > +     /*
> > +      * We track the most recently used timeline to skip a rbtree search
> > +      * for the common case, under typical loads we never need the rbtree
> > +      * at all. We can reuse the last_active slot if it is empty, that is
> > +      * after the previous activity has been retired, or if the active
> > +      * matches the current timeline.
> > +      */
> > +     old = i915_gem_active_raw(&vma->last_active,
> > +                               &vma->vm->i915->drm.struct_mutex);
> > +     if (!old || old->fence.context == idx)
> > +             goto out;
> 
> Is the situation that retire can be out of order relative to 
> move_to_active? In other words, last_active can retire before the rbtree 
> record, and so the following new move_to_active will find last_active 
> empty and so could create a double entry for the same timeline?

We don't mind a double entry, and do expect that last_active and the
rbtree entry will still be active, tracking different requests.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] drm/i915: Track the last-active inside the i915_vma
  2018-07-05 12:02       ` Chris Wilson
@ 2018-07-05 12:29         ` Tvrtko Ursulin
  2018-07-05 12:48           ` Chris Wilson
  0 siblings, 1 reply; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-05 12:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 05/07/2018 13:02, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-07-05 12:38:46)
>>
>> On 04/07/2018 09:34, Chris Wilson wrote:
>>> Using a VMA on more than one timeline concurrently is the exception
>>> rather than the rule (using it concurrently on multiple engines). As we
>>> expect to only use one active tracker, store the most recently used
>>> tracker inside the i915_vma itself and only fallback to the rbtree if
>>> we need a second or more concurrent active trackers.
>>>
>>> v2: Comments on how we overwrite any existing last_active cache.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_vma.c | 50 +++++++++++++++++++++++++++++++--
>>>    drivers/gpu/drm/i915/i915_vma.h |  1 +
>>>    2 files changed, 49 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>>> index cd94ffc7f079..33925e00f7e8 100644
>>> --- a/drivers/gpu/drm/i915/i915_vma.c
>>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>>> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
>>>        __i915_vma_retire(active->vma, rq);
>>>    }
>>>    
>>> +static void
>>> +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
>>> +{
>>> +     __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
>>> +}
>>> +
>>>    static struct i915_vma *
>>>    vma_create(struct drm_i915_gem_object *obj,
>>>           struct i915_address_space *vm,
>>> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>>>    
>>>        vma->active = RB_ROOT;
>>>    
>>> +     init_request_active(&vma->last_active, i915_vma_last_retire);
>>>        init_request_active(&vma->last_fence, NULL);
>>>        vma->vm = vm;
>>>        vma->ops = &vm->vma_ops;
>>> @@ -895,6 +902,22 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
>>>    {
>>>        struct i915_vma_active *active;
>>>        struct rb_node **p, *parent;
>>> +     struct i915_request *old;
>>> +
>>> +     /*
>>> +      * We track the most recently used timeline to skip a rbtree search
>>> +      * for the common case, under typical loads we never need the rbtree
>>> +      * at all. We can reuse the last_active slot if it is empty, that is
>>> +      * after the previous activity has been retired, or if the active
>>> +      * matches the current timeline.
>>> +      */
>>> +     old = i915_gem_active_raw(&vma->last_active,
>>> +                               &vma->vm->i915->drm.struct_mutex);
>>> +     if (!old || old->fence.context == idx)
>>> +             goto out;
>>
>> Is the situation that retire can be out of order relative to
>> move_to_active? In other words, last_active can retire before the rbtree
>> record, and so the following new move_to_active will find last_active
>> empty and so could create a double entry for the same timeline?
> 
> We don't mind a double entry, and do expect that last_active and the
> rbtree entry will still be active, tracking different requests.

Maybe I mind double entries, if avoiding them would make the code easier 
to understand. :) Or not that we don't mind, but need them? Different 
requests you say, but on same timeline or?

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2] drm/i915: Track the last-active inside the i915_vma
  2018-07-05 12:29         ` Tvrtko Ursulin
@ 2018-07-05 12:48           ` Chris Wilson
  0 siblings, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2018-07-05 12:48 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-07-05 13:29:44)
> 
> On 05/07/2018 13:02, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-07-05 12:38:46)
> >>
> >> On 04/07/2018 09:34, Chris Wilson wrote:
> >>> Using a VMA on more than one timeline concurrently is the exception
> >>> rather than the rule (using it concurrently on multiple engines). As we
> >>> expect to only use one active tracker, store the most recently used
> >>> tracker inside the i915_vma itself and only fallback to the rbtree if
> >>> we need a second or more concurrent active trackers.
> >>>
> >>> v2: Comments on how we overwrite any existing last_active cache.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_vma.c | 50 +++++++++++++++++++++++++++++++--
> >>>    drivers/gpu/drm/i915/i915_vma.h |  1 +
> >>>    2 files changed, 49 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> >>> index cd94ffc7f079..33925e00f7e8 100644
> >>> --- a/drivers/gpu/drm/i915/i915_vma.c
> >>> +++ b/drivers/gpu/drm/i915/i915_vma.c
> >>> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
> >>>        __i915_vma_retire(active->vma, rq);
> >>>    }
> >>>    
> >>> +static void
> >>> +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> >>> +{
> >>> +     __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> >>> +}
> >>> +
> >>>    static struct i915_vma *
> >>>    vma_create(struct drm_i915_gem_object *obj,
> >>>           struct i915_address_space *vm,
> >>> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
> >>>    
> >>>        vma->active = RB_ROOT;
> >>>    
> >>> +     init_request_active(&vma->last_active, i915_vma_last_retire);
> >>>        init_request_active(&vma->last_fence, NULL);
> >>>        vma->vm = vm;
> >>>        vma->ops = &vm->vma_ops;
> >>> @@ -895,6 +902,22 @@ static struct i915_gem_active *lookup_active(struct i915_vma *vma, u64 idx)
> >>>    {
> >>>        struct i915_vma_active *active;
> >>>        struct rb_node **p, *parent;
> >>> +     struct i915_request *old;
> >>> +
> >>> +     /*
> >>> +      * We track the most recently used timeline to skip a rbtree search
> >>> +      * for the common case, under typical loads we never need the rbtree
> >>> +      * at all. We can reuse the last_active slot if it is empty, that is
> >>> +      * after the previous activity has been retired, or if the active
> >>> +      * matches the current timeline.
> >>> +      */
> >>> +     old = i915_gem_active_raw(&vma->last_active,
> >>> +                               &vma->vm->i915->drm.struct_mutex);
> >>> +     if (!old || old->fence.context == idx)
> >>> +             goto out;
> >>
> >> Is the situation that retire can be out of order relative to
> >> move_to_active? In other words, last_active can retire before the rbtree
> >> record, and so the following new move_to_active will find last_active
> >> empty and so could create a double entry for the same timeline?
> > 
> > We don't mind a double entry, and do expect that last_active and the
> > rbtree entry will still be active, tracking different requests.
> 
> Maybe I mind double entries, if avoiding them would make the code easier 
> to understand. :) Or not that we don't mind, but need them? Different 
> requests you say, but on same timeline or?

Same timeline, we are indexing by timeline after all. Having the double
entry avoids having to search the rbtree for every new request, and even
the radixtree lookup was a noticeable wart in the profiles.

Ok, not every new request, only on changing, do you then need to lookup
the new request to promote it to cached, but still need to insert the old
cached request. That's more code just to avoid the isset() replacement.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma
  2018-07-06 10:39 ` [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma Chris Wilson
@ 2018-07-06 11:07   ` Tvrtko Ursulin
  0 siblings, 0 replies; 40+ messages in thread
From: Tvrtko Ursulin @ 2018-07-06 11:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 06/07/2018 11:39, Chris Wilson wrote:
> Using a VMA on more than one timeline concurrently is the exception
> rather than the rule (using it concurrently on multiple engines). As we
> expect to only use one active tracker, store the most recently used
> tracker inside the i915_vma itself and only fallback to the rbtree if
> we need a second or more concurrent active trackers.
> 
> v2: Comments on how we overwrite any existing last_active cache.
> v3: __list_del_entry() before list_replace_init() is confusing and, much
> more important, entirely redundant.
> v4: Note that both last_active and the rbtree may be simultaneously
> tracking this timeline, albeit with different requests, and so the vma
> may be retired twice for the same timeline.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_vma.c | 56 +++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/i915_vma.h |  1 +
>   2 files changed, 55 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index b4cc98330225..6fbd09d6af28 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
>   	__i915_vma_retire(active->vma, rq);
>   }
>   
> +static void
> +i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
> +{
> +	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
> +}
> +
>   static struct i915_vma *
>   vma_create(struct drm_i915_gem_object *obj,
>   	   struct i915_address_space *vm,
> @@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
>   
>   	vma->active = RB_ROOT;
>   
> +	init_request_active(&vma->last_active, i915_vma_last_retire);
>   	init_request_active(&vma->last_fence, NULL);
>   	vma->vm = vm;
>   	vma->ops = &vm->vma_ops;
> @@ -895,6 +902,29 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
>   {
>   	struct i915_vma_active *active;
>   	struct rb_node **p, *parent;
> +	struct i915_request *old;
> +
> +	/*
> +	 * We track the most recently used timeline to skip a rbtree search
> +	 * for the common case, under typical loads we never need the rbtree
> +	 * at all. We can reuse the last_active slot if it is empty, that is
> +	 * after the previous activity has been retired, or if the active
> +	 * matches the current timeline.
> +	 *
> +	 * Note that we allow the timeline to be active simultaneously in
> +	 * the rbtree and the last_active cache. We do this to avoid having
> +	 * to search and replace the rbtree element for a new timeline, with
> +	 * the cost being that we must be aware that the vma may be retired
> +	 * twice for the same timeline (as the older rbtree element will be
> +	 * retired before the new request added to last_active).
> +	 */
> +	old = i915_gem_active_raw(&vma->last_active,
> +				  &vma->vm->i915->drm.struct_mutex);
> +	if (!old || old->fence.context == idx)
> +		goto out;
> +
> +	/* Move the currently active fence into the rbtree */
> +	idx = old->fence.context;
>   
>   	parent = NULL;
>   	p = &vma->active.rb_node;
> @@ -903,7 +933,7 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
>   
>   		active = rb_entry(parent, struct i915_vma_active, node);
>   		if (active->timeline == idx)
> -			return &active->base;
> +			goto replace;
>   
>   		if (active->timeline < idx)
>   			p = &parent->rb_right;
> @@ -922,7 +952,24 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
>   	rb_link_node(&active->node, parent, p);
>   	rb_insert_color(&active->node, &vma->active);
>   
> -	return &active->base;
> +replace:
> +	/*
> +	 * Overwrite the previous active slot in the rbtree with last_active,
> +	 * leaving last_active zeroed. If the previous slot is still active,
> +	 * we must be careful as we now only expect to recieve one retire
> +	 * callback not two, and so much undo the active counting for the
> +	 * overwritten slot.
> +	 */
> +	if (i915_gem_active_isset(&active->base)) {
> +		vma->active_count--;
> +		GEM_BUG_ON(!vma->active_count);
> +	}
> +	GEM_BUG_ON(list_empty(&vma->last_active.link));
> +	list_replace_init(&vma->last_active.link, &active->base.link);
> +	active->base.request = fetch_and_zero(&vma->last_active.request);
> +
> +out:
> +	return &vma->last_active;
>   }
>   
>   int i915_vma_move_to_active(struct i915_vma *vma,
> @@ -1002,6 +1049,11 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		 */
>   		__i915_vma_pin(vma);
>   
> +		ret = i915_gem_active_retire(&vma->last_active,
> +					     &vma->vm->i915->drm.struct_mutex);
> +		if (ret)
> +			goto unpin;
> +
>   		rbtree_postorder_for_each_entry_safe(active, n,
>   						     &vma->active, node) {
>   			ret = i915_gem_active_retire(&active->base,
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index c297b0a0dc47..f06d66377107 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -97,6 +97,7 @@ struct i915_vma {
>   
>   	unsigned int active_count;
>   	struct rb_root active;
> +	struct i915_gem_active last_active;
>   	struct i915_gem_active last_fence;
>   
>   	/**
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma
  2018-07-06 10:39 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
@ 2018-07-06 10:39 ` Chris Wilson
  2018-07-06 11:07   ` Tvrtko Ursulin
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2018-07-06 10:39 UTC (permalink / raw)
  To: intel-gfx

Using a VMA on more than one timeline concurrently is the exception
rather than the rule (using it concurrently on multiple engines). As we
expect to only use one active tracker, store the most recently used
tracker inside the i915_vma itself and only fallback to the rbtree if
we need a second or more concurrent active trackers.

v2: Comments on how we overwrite any existing last_active cache.
v3: __list_del_entry() before list_replace_init() is confusing and, much
more important, entirely redundant.
v4: Note that both last_active and the rbtree may be simultaneously
tracking this timeline, albeit with different requests, and so the vma
may be retired twice for the same timeline.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_vma.c | 56 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_vma.h |  1 +
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index b4cc98330225..6fbd09d6af28 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -119,6 +119,12 @@ i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
 	__i915_vma_retire(active->vma, rq);
 }
 
+static void
+i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+	__i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
+}
+
 static struct i915_vma *
 vma_create(struct drm_i915_gem_object *obj,
 	   struct i915_address_space *vm,
@@ -136,6 +142,7 @@ vma_create(struct drm_i915_gem_object *obj,
 
 	vma->active = RB_ROOT;
 
+	init_request_active(&vma->last_active, i915_vma_last_retire);
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
@@ -895,6 +902,29 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
 {
 	struct i915_vma_active *active;
 	struct rb_node **p, *parent;
+	struct i915_request *old;
+
+	/*
+	 * We track the most recently used timeline to skip a rbtree search
+	 * for the common case, under typical loads we never need the rbtree
+	 * at all. We can reuse the last_active slot if it is empty, that is
+	 * after the previous activity has been retired, or if the active
+	 * matches the current timeline.
+	 *
+	 * Note that we allow the timeline to be active simultaneously in
+	 * the rbtree and the last_active cache. We do this to avoid having
+	 * to search and replace the rbtree element for a new timeline, with
+	 * the cost being that we must be aware that the vma may be retired
+	 * twice for the same timeline (as the older rbtree element will be
+	 * retired before the new request added to last_active).
+	 */
+	old = i915_gem_active_raw(&vma->last_active,
+				  &vma->vm->i915->drm.struct_mutex);
+	if (!old || old->fence.context == idx)
+		goto out;
+
+	/* Move the currently active fence into the rbtree */
+	idx = old->fence.context;
 
 	parent = NULL;
 	p = &vma->active.rb_node;
@@ -903,7 +933,7 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
 
 		active = rb_entry(parent, struct i915_vma_active, node);
 		if (active->timeline == idx)
-			return &active->base;
+			goto replace;
 
 		if (active->timeline < idx)
 			p = &parent->rb_right;
@@ -922,7 +952,24 @@ static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
 	rb_link_node(&active->node, parent, p);
 	rb_insert_color(&active->node, &vma->active);
 
-	return &active->base;
+replace:
+	/*
+	 * Overwrite the previous active slot in the rbtree with last_active,
+	 * leaving last_active zeroed. If the previous slot is still active,
+	 * we must be careful as we now only expect to recieve one retire
+	 * callback not two, and so much undo the active counting for the
+	 * overwritten slot.
+	 */
+	if (i915_gem_active_isset(&active->base)) {
+		vma->active_count--;
+		GEM_BUG_ON(!vma->active_count);
+	}
+	GEM_BUG_ON(list_empty(&vma->last_active.link));
+	list_replace_init(&vma->last_active.link, &active->base.link);
+	active->base.request = fetch_and_zero(&vma->last_active.request);
+
+out:
+	return &vma->last_active;
 }
 
 int i915_vma_move_to_active(struct i915_vma *vma,
@@ -1002,6 +1049,11 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 */
 		__i915_vma_pin(vma);
 
+		ret = i915_gem_active_retire(&vma->last_active,
+					     &vma->vm->i915->drm.struct_mutex);
+		if (ret)
+			goto unpin;
+
 		rbtree_postorder_for_each_entry_safe(active, n,
 						     &vma->active, node) {
 			ret = i915_gem_active_retire(&active->base,
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index c297b0a0dc47..f06d66377107 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -97,6 +97,7 @@ struct i915_vma {
 
 	unsigned int active_count;
 	struct rb_root active;
+	struct i915_gem_active last_active;
 	struct i915_gem_active last_fence;
 
 	/**
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2018-07-06 11:08 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-29 22:54 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
2018-06-29 22:54 ` [PATCH 2/6] drm/i915: Export i915_request_skip() Chris Wilson
2018-07-02 11:37   ` Tvrtko Ursulin
2018-06-29 22:54 ` [PATCH 3/6] drm/i915: Start returning an error from i915_vma_move_to_active() Chris Wilson
2018-07-02 11:41   ` Tvrtko Ursulin
2018-06-29 22:54 ` [PATCH 4/6] drm/i915: Move i915_vma_move_to_active() to i915_vma.c Chris Wilson
2018-07-02 11:41   ` Tvrtko Ursulin
2018-06-29 22:54 ` [PATCH 5/6] drm/i915: Track vma activity per fence.context, not per engine Chris Wilson
2018-07-03 17:28   ` Tvrtko Ursulin
2018-07-03 20:29     ` Chris Wilson
2018-07-04  9:43       ` Tvrtko Ursulin
2018-07-04  9:53         ` Chris Wilson
2018-07-04  9:13   ` [PATCH v3] " Chris Wilson
2018-07-04 11:19     ` Tvrtko Ursulin
2018-06-29 22:54 ` [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma Chris Wilson
2018-07-03 17:40   ` Tvrtko Ursulin
2018-07-04  8:34   ` [PATCH v2] " Chris Wilson
2018-07-04  9:39     ` Tvrtko Ursulin
2018-07-04 11:34       ` Tvrtko Ursulin
2018-07-04 11:47         ` Chris Wilson
2018-07-04 12:30           ` Tvrtko Ursulin
2018-07-05 11:38     ` Tvrtko Ursulin
2018-07-05 12:02       ` Chris Wilson
2018-07-05 12:29         ` Tvrtko Ursulin
2018-07-05 12:48           ` Chris Wilson
2018-06-29 23:04 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Patchwork
2018-06-29 23:19 ` ✓ Fi.CI.BAT: success " Patchwork
2018-06-30  3:03 ` ✓ Fi.CI.IGT: " Patchwork
2018-07-02 11:34 ` [PATCH 1/6] " Tvrtko Ursulin
2018-07-02 11:44   ` Chris Wilson
2018-07-02 12:29     ` Tvrtko Ursulin
2018-07-04  8:52 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev2) Patchwork
2018-07-04  8:55 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-07-04  9:07 ` ✓ Fi.CI.BAT: success " Patchwork
2018-07-04  9:34 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() (rev3) Patchwork
2018-07-04  9:37 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-07-04  9:49 ` ✓ Fi.CI.BAT: success " Patchwork
2018-07-04 10:43 ` ✓ Fi.CI.IGT: " Patchwork
2018-07-06 10:39 [PATCH 1/6] drm/i915: Refactor export_fence() after i915_vma_move_to_active() Chris Wilson
2018-07-06 10:39 ` [PATCH 6/6] drm/i915: Track the last-active inside the i915_vma Chris Wilson
2018-07-06 11:07   ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.