Prep work for preemption and GEM bugfixes

All of lore.kernel.org
 help / color / mirror / Atom feed

* Prep work for preemption and GEM bugfixes
@ 2017-02-14  9:53 Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (14 more replies)
  0 siblings, 15 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:53 UTC (permalink / raw)
  To: intel-gfx

Tidy up the global seqno in preparation for requests being cancelled,
and their seqno returned - even if they are currently being waited upon
by third parties. In the process one annoying GEM_DEBUG sporadic failure
is fixed and we are able to take another pass through
i915_wait_request() and make its loops more uniform.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-15 17:05   ` Tvrtko Ursulin
  2017-02-14  9:54 ` [PATCH v2 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
                   ` (13 subsequent siblings)
  14 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

Replace the global device seqno with one for each engine, and account
for in-flight seqno on each separately. This is consistent with
dma-fence as each timeline has separate fence-contexts for each engine
and a seqno is only ordered within a fence-context (i.e.  seqno do not
need to be ordered wrt to other engines, just ordered within a single
engine). This is required to enable request rewinding for preemption on
individual engines (we have to rewind the global seqno to avoid
overflow, and we do not have to rewind all engines just to preempt one.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  5 +--
 drivers/gpu/drm/i915/i915_gem_request.c  | 68 +++++++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_request.h  |  8 +---
 drivers/gpu/drm/i915/i915_gem_timeline.h |  4 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 +++++++---------
 drivers/gpu/drm/i915/intel_engine_cs.c   |  2 -
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
 7 files changed, 52 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index cda957c674ee..9b636962cab6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1080,10 +1080,7 @@ static const struct file_operations i915_error_state_fops = {
 static int
 i915_next_seqno_get(void *data, u64 *val)
 {
-	struct drm_i915_private *dev_priv = data;
-
-	*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
-	return 0;
+	return -ENODEV;
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 24571a5289a4..2d84da0e2b39 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -213,7 +213,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
 	GEM_BUG_ON(!i915_gem_request_completed(request));
+
 	GEM_BUG_ON(!request->i915->gt.active_requests);
+	GEM_BUG_ON(!request->engine->timeline->active_seqno);
 
 	trace_i915_gem_request_retire(request);
 
@@ -237,6 +239,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 				 &request->i915->gt.idle_work,
 				 msecs_to_jiffies(100));
 	}
+	request->engine->timeline->active_seqno--;
 
 	/* Walk through the active list, calling retire on each. This allows
 	 * objects to track their GPU activity and mark themselves as idle
@@ -325,15 +328,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
 	GEM_BUG_ON(i915->gt.active_requests > 1);
 
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-	if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
-		while (intel_breadcrumbs_busy(i915))
-			cond_resched(); /* spin until threads are complete */
-	}
-	atomic_set(&timeline->seqno, seqno);
+	for_each_engine(engine, i915, id) {
+		struct intel_timeline *tl = &timeline->engine[id];
 
-	/* Finally reset hw state */
-	for_each_engine(engine, i915, id)
+		if (!i915_seqno_passed(seqno, tl->seqno)) {
+			/* spin until threads are complete */
+			while (intel_breadcrumbs_busy(engine))
+				cond_resched();
+		}
+
+		/* Finally reset hw state */
+		tl->seqno = seqno;
 		intel_engine_init_global_seqno(engine, seqno);
+	}
 
 	list_for_each_entry(timeline, &i915->gt.timelines, link) {
 		for_each_engine(engine, i915, id) {
@@ -361,34 +368,28 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
 	return i915_gem_init_global_seqno(dev_priv, seqno - 1);
 }
 
-static int reserve_global_seqno(struct drm_i915_private *i915)
+static int reserve_global_seqno(struct intel_engine_cs *engine)
 {
-	u32 active_requests = ++i915->gt.active_requests;
-	u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
+	u32 active = ++engine->timeline->active_seqno;
+	u32 seqno = engine->timeline->seqno;
 	int ret;
 
 	/* Reservation is fine until we need to wrap around */
-	if (likely(seqno + active_requests > seqno))
+	if (likely(seqno + active > seqno))
 		return 0;
 
-	ret = i915_gem_init_global_seqno(i915, 0);
+	ret = i915_gem_init_global_seqno(engine->i915, 0);
 	if (ret) {
-		i915->gt.active_requests--;
+		engine->timeline->active_seqno--;
 		return ret;
 	}
 
 	return 0;
 }
 
-static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
+static u32 timeline_get_seqno(struct intel_timeline *tl)
 {
-	/* seqno only incremented under a mutex */
-	return ++tl->seqno.counter;
-}
-
-static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
-{
-	return atomic_inc_return(&tl->seqno);
+	return ++tl->seqno;
 }
 
 void __i915_gem_request_submit(struct drm_i915_gem_request *request)
@@ -402,14 +403,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(timeline == request->timeline);
 	assert_spin_locked(&timeline->lock);
 
-	seqno = timeline_get_seqno(timeline->common);
+	seqno = timeline_get_seqno(timeline);
 	GEM_BUG_ON(!seqno);
 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
 
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
-	request->previous_seqno = timeline->last_submitted_seqno;
-	timeline->last_submitted_seqno = seqno;
-
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = seqno;
@@ -514,7 +511,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = reserve_global_seqno(dev_priv);
+	ret = reserve_global_seqno(engine);
 	if (ret)
 		goto err_unpin;
 
@@ -566,7 +563,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       &i915_fence_ops,
 		       &req->lock,
 		       req->timeline->fence_context,
-		       __timeline_get_seqno(req->timeline->common));
+		       timeline_get_seqno(req->timeline));
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
@@ -611,6 +608,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 */
 	req->head = req->ring->tail;
 
+	/* Check that we didn't interrupt ourselves with a new request */
+	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
 	return req;
 
 err_ctx:
@@ -621,7 +620,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	kmem_cache_free(dev_priv->requests, req);
 err_unreserve:
-	dev_priv->gt.active_requests--;
+	engine->timeline->active_seqno--;
 err_unpin:
 	engine->context_unpin(engine, ctx);
 	return ERR_PTR(ret);
@@ -833,8 +832,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * our i915_gem_request_alloc() and called __i915_add_request() before
 	 * us, the timeline will hold its seqno which is later than ours.
 	 */
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-				     request->fence.seqno));
+	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
 
 	/*
 	 * To ensure that this call will not fail, space for its emissions
@@ -889,16 +887,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	list_add_tail(&request->link, &timeline->requests);
 	spin_unlock_irq(&timeline->lock);
 
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-				     request->fence.seqno));
-
-	timeline->last_submitted_seqno = request->fence.seqno;
+	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
 	i915_gem_active_set(&timeline->last_request, request);
 
 	list_add_tail(&request->ring_link, &ring->request_list);
 	request->emitted_jiffies = jiffies;
 
-	i915_gem_mark_busy(engine);
+	if (!request->i915->gt.active_requests++)
+		i915_gem_mark_busy(engine);
 
 	/* Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index ea511f06efaf..9049936c571c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -145,12 +145,6 @@ struct drm_i915_gem_request {
 
 	u32 global_seqno;
 
-	/** GEM sequence number associated with the previous request,
-	 * when the HWS breadcrumb is equal to this the GPU is processing
-	 * this request.
-	 */
-	u32 previous_seqno;
-
 	/** Position in the ring of the start of the request */
 	u32 head;
 
@@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
 	GEM_BUG_ON(!req->global_seqno);
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->previous_seqno);
+				 req->global_seqno - 1);
 }
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index f2e51f42cc2f..bc101e644143 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -33,7 +33,8 @@ struct i915_gem_timeline;
 
 struct intel_timeline {
 	u64 fence_context;
-	u32 last_submitted_seqno;
+	u32 seqno;
+	u32 active_seqno;
 
 	spinlock_t lock;
 
@@ -56,7 +57,6 @@ struct intel_timeline {
 
 struct i915_gem_timeline {
 	struct list_head link;
-	atomic_t seqno;
 
 	struct drm_i915_private *i915;
 	const char *name;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 74cb7b91b5db..4f859e423ef3 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -655,31 +655,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 	cancel_fake_irq(engine);
 }
 
-unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915)
+bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	unsigned int mask = 0;
-
-	for_each_engine(engine, i915, id) {
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-		spin_lock_irq(&b->lock);
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	bool busy = false;
 
-		if (b->first_wait) {
-			wake_up_process(b->first_wait->tsk);
-			mask |= intel_engine_flag(engine);
-		}
+	spin_lock_irq(&b->lock);
 
-		if (b->first_signal) {
-			wake_up_process(b->signaler);
-			mask |= intel_engine_flag(engine);
-		}
+	if (b->first_wait) {
+		wake_up_process(b->first_wait->tsk);
+		busy |= intel_engine_flag(engine);
+	}
 
-		spin_unlock_irq(&b->lock);
+	if (b->first_signal) {
+		wake_up_process(b->signaler);
+		busy |= intel_engine_flag(engine);
 	}
 
-	return mask;
+	spin_unlock_irq(&b->lock);
+
+	return busy;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 538d845d7251..1798b87fd934 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -254,8 +254,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 		engine->irq_seqno_barrier(engine);
 
 	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
-	engine->timeline->last_submitted_seqno = seqno;
-
 	engine->hangcheck.seqno = seqno;
 
 	/* After manually advancing the seqno, fake the interrupt in case
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index cc62e89010d3..9412c8f58619 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -564,7 +564,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
 	 * wtih serialising this hint with anything, so document it as
 	 * a hint and nothing more.
 	 */
-	return READ_ONCE(engine->timeline->last_submitted_seqno);
+	return READ_ONCE(engine->timeline->seqno);
 }
 
 int init_workarounds_ring(struct intel_engine_cs *engine);
@@ -637,6 +637,6 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
 
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
+bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
 
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-15 17:06   ` Tvrtko Ursulin
  2017-02-14  9:54 ` [PATCH v2 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
                   ` (12 subsequent siblings)
  14 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

Use a local variable to avoid having to type out the full name of the
gpu_error wait_queue.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 2d84da0e2b39..f7ff3736797f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1061,6 +1061,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
 	DEFINE_WAIT(reset);
 	struct intel_wait wait;
 
@@ -1096,7 +1097,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	set_current_state(state);
 	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+		add_wait_queue(errq, &reset);
 
 	intel_wait_init(&wait, req->global_seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
@@ -1147,8 +1148,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		    i915_reset_in_progress(&req->i915->gpu_error)) {
 			__set_current_state(TASK_RUNNING);
 			i915_reset(req->i915);
-			reset_wait_queue(&req->i915->gpu_error.wait_queue,
-					 &reset);
+			reset_wait_queue(errq, &reset);
 			continue;
 		}
 
@@ -1159,7 +1159,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	intel_engine_remove_wait(req->engine, &wait);
 	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+		remove_wait_queue(errq, &reset);
 	__set_current_state(TASK_RUNNING);
 
 complete:
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-15 17:11   ` Tvrtko Ursulin
  2017-02-14  9:54 ` [PATCH v2 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
                   ` (11 subsequent siblings)
  14 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

Add ourselves to the gpu error waitqueue earllier on, even before we
determine we have to wait on the seqno. This is so that we can then
share the waitqueue between stages in subsequent patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index f7ff3736797f..dcc0a7ab95dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1081,6 +1081,9 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	trace_i915_gem_request_wait_begin(req);
 
+	if (flags & I915_WAIT_LOCKED)
+		add_wait_queue(errq, &reset);
+
 	if (!i915_sw_fence_done(&req->execute)) {
 		timeout = __i915_request_wait_for_execute(req, flags, timeout);
 		if (timeout < 0)
@@ -1096,9 +1099,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(errq, &reset);
-
 	intel_wait_init(&wait, req->global_seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
 		/* In order to check that we haven't missed the interrupt
@@ -1158,11 +1158,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	}
 
 	intel_engine_remove_wait(req->engine, &wait);
-	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(errq, &reset);
 	__set_current_state(TASK_RUNNING);
 
 complete:
+	if (flags & I915_WAIT_LOCKED)
+		remove_wait_queue(errq, &reset);
 	trace_i915_gem_request_wait_end(req);
 
 	return timeout;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute()
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (2 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-17 14:04   ` Tvrtko Ursulin
  2017-02-14  9:54 ` [PATCH v2 05/14] drm/i915: Deconstruct execute fence Chris Wilson
                   ` (10 subsequent siblings)
  14 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

It had only one callsite and existed to keep the code clearer. Now
having shared the wait-on-error between phases and with plans to change
the wait-for-execute in the next few patches, remove the out of line
wait loop and move it into the main body of i915_wait_request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 78 ++++++++++++---------------------
 1 file changed, 29 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index dcc0a7ab95dc..001fc9fedf49 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -988,54 +988,6 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	return false;
 }
 
-static long
-__i915_request_wait_for_execute(struct drm_i915_gem_request *request,
-				unsigned int flags,
-				long timeout)
-{
-	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
-		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	wait_queue_head_t *q = &request->i915->gpu_error.wait_queue;
-	DEFINE_WAIT(reset);
-	DEFINE_WAIT(wait);
-
-	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(q, &reset);
-
-	do {
-		prepare_to_wait(&request->execute.wait, &wait, state);
-
-		if (i915_sw_fence_done(&request->execute))
-			break;
-
-		if (flags & I915_WAIT_LOCKED &&
-		    i915_reset_in_progress(&request->i915->gpu_error)) {
-			__set_current_state(TASK_RUNNING);
-			i915_reset(request->i915);
-			reset_wait_queue(q, &reset);
-			continue;
-		}
-
-		if (signal_pending_state(state, current)) {
-			timeout = -ERESTARTSYS;
-			break;
-		}
-
-		if (!timeout) {
-			timeout = -ETIME;
-			break;
-		}
-
-		timeout = io_schedule_timeout(timeout);
-	} while (1);
-	finish_wait(&request->execute.wait, &wait);
-
-	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(q, &reset);
-
-	return timeout;
-}
-
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: the request to wait upon
@@ -1085,7 +1037,35 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		add_wait_queue(errq, &reset);
 
 	if (!i915_sw_fence_done(&req->execute)) {
-		timeout = __i915_request_wait_for_execute(req, flags, timeout);
+		DEFINE_WAIT(exec);
+
+		do {
+			prepare_to_wait(&req->execute.wait, &exec, state);
+			if (i915_sw_fence_done(&req->execute))
+				break;
+
+			if (flags & I915_WAIT_LOCKED &&
+			    i915_reset_in_progress(&req->i915->gpu_error)) {
+				__set_current_state(TASK_RUNNING);
+				i915_reset(req->i915);
+				reset_wait_queue(errq, &reset);
+				continue;
+			}
+
+			if (signal_pending_state(state, current)) {
+				timeout = -ERESTARTSYS;
+				break;
+			}
+
+			if (!timeout) {
+				timeout = -ETIME;
+				break;
+			}
+
+			timeout = io_schedule_timeout(timeout);
+		} while (1);
+		finish_wait(&req->execute.wait, &exec);
+
 		if (timeout < 0)
 			goto complete;
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 05/14] drm/i915: Deconstruct execute fence
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (3 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-17 14:26   ` Tvrtko Ursulin
  2017-02-14  9:54 ` [PATCH v2 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
                   ` (9 subsequent siblings)
  14 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

On reflection, we are only using the execute fence as a waitqueue on the
global_seqno and not using it for dependency tracking between fences
(unlike the submit and dma fences). By only treating it as a waitqueue,
we can then treat it similar to the other waitqueues durin submit,
making the code simpler.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 47 +++++++--------------------------
 drivers/gpu/drm/i915/i915_gem_request.h | 10 +------
 2 files changed, 11 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 001fc9fedf49..bb59acaa8a34 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -69,7 +69,6 @@ static void i915_fence_release(struct dma_fence *fence)
 	 * caught trying to reuse dead objects.
 	 */
 	i915_sw_fence_fini(&req->submit);
-	i915_sw_fence_fini(&req->execute);
 
 	kmem_cache_free(req->i915->requests, req);
 }
@@ -211,7 +210,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
-	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
 	GEM_BUG_ON(!i915_gem_request_completed(request));
 
 	GEM_BUG_ON(!request->i915->gt.active_requests);
@@ -422,7 +420,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	list_move_tail(&request->link, &timeline->requests);
 	spin_unlock(&request->timeline->lock);
 
-	i915_sw_fence_commit(&request->execute);
+	wake_up_all(&request->execute);
 }
 
 void i915_gem_request_submit(struct drm_i915_gem_request *request)
@@ -457,24 +455,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
-static int __i915_sw_fence_call
-execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
-{
-	struct drm_i915_gem_request *request =
-		container_of(fence, typeof(*request), execute);
-
-	switch (state) {
-	case FENCE_COMPLETE:
-		break;
-
-	case FENCE_FREE:
-		i915_gem_request_put(request);
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
 /**
  * i915_gem_request_alloc - allocate a request structure
  *
@@ -567,13 +547,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
-	i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify);
-
-	/* Ensure that the execute fence completes after the submit fence -
-	 * as we complete the execute fence from within the submit fence
-	 * callback, its completion would otherwise be visible first.
-	 */
-	i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq);
+	init_waitqueue_head(&req->execute);
 
 	i915_priotree_init(&req->priotree);
 
@@ -1015,6 +989,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
 	DEFINE_WAIT(reset);
+	DEFINE_WAIT(exec);
 	struct intel_wait wait;
 
 	might_sleep();
@@ -1036,12 +1011,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
-	if (!i915_sw_fence_done(&req->execute)) {
-		DEFINE_WAIT(exec);
-
+	reset_wait_queue(&req->execute, &exec);
+	if (!req->global_seqno) {
 		do {
-			prepare_to_wait(&req->execute.wait, &exec, state);
-			if (i915_sw_fence_done(&req->execute))
+			set_current_state(state);
+			if (req->global_seqno)
 				break;
 
 			if (flags & I915_WAIT_LOCKED &&
@@ -1064,15 +1038,14 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 			timeout = io_schedule_timeout(timeout);
 		} while (1);
-		finish_wait(&req->execute.wait, &exec);
+		finish_wait(&req->execute, &exec);
 
 		if (timeout < 0)
 			goto complete;
 
-		GEM_BUG_ON(!i915_sw_fence_done(&req->execute));
+		GEM_BUG_ON(!req->global_seqno);
 	}
-	GEM_BUG_ON(!i915_sw_fence_done(&req->submit));
-	GEM_BUG_ON(!req->global_seqno);
+	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
 	/* Optimistic short spin before touching IRQs */
 	if (i915_spin_request(req, state, 5))
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 9049936c571c..467d3e13fce0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -119,18 +119,10 @@ struct drm_i915_gem_request {
 	 * The submit fence is used to await upon all of the request's
 	 * dependencies. When it is signaled, the request is ready to run.
 	 * It is used by the driver to then queue the request for execution.
-	 *
-	 * The execute fence is used to signal when the request has been
-	 * sent to hardware.
-	 *
-	 * It is illegal for the submit fence of one request to wait upon the
-	 * execute fence of an earlier request. It should be sufficient to
-	 * wait upon the submit fence of the earlier request.
 	 */
 	struct i915_sw_fence submit;
-	struct i915_sw_fence execute;
 	wait_queue_t submitq;
-	wait_queue_t execq;
+	wait_queue_head_t execute;
 
 	/* A list of everyone we wait upon, and everyone who waits upon us.
 	 * Even though we will not be submitted to the hardware before the
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (4 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 05/14] drm/i915: Deconstruct execute fence Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-17 14:43   ` Tvrtko Ursulin
  2017-02-14  9:54 ` [PATCH v2 07/14] drm/i915: Take a reference whilst processing the signaler request Chris Wilson
                   ` (8 subsequent siblings)
  14 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

A request is assigned a global seqno only when it is on the hardware
execution queue. The global seqno can be used to maintain a list of
requests on the same engine in retirement order, for example for
constructing a priority queue for waiting. Prior to its execution, or
if it is subsequently removed in the event of preemption, its global
seqno is zero. As both insertion and removal from the execution queue
may operate in IRQ context, it is not guarded by the usual struct_mutex
BKL. Instead those relying on the global seqno must be prepared for its
value to change between reads. Only when the request is complete can
the global seqno be stable (due to the memory barriers on submitting
the commands to the hardware to write the breadcrumb, if the HWS shows
that it has passed the global seqno and the global seqno is unchanged
after the read, it is indeed complete).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          | 16 ++++++--
 drivers/gpu/drm/i915/i915_gem.c          | 16 +++++---
 drivers/gpu/drm/i915/i915_gem_request.c  | 46 ++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_request.h  | 66 +++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 ++++--
 5 files changed, 114 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 251b2d66407e..cb66fc33cab6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4005,14 +4005,24 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 }
 
 static inline bool
-__i915_request_irq_complete(struct drm_i915_gem_request *req)
+__i915_request_irq_complete(const struct drm_i915_gem_request *req)
 {
 	struct intel_engine_cs *engine = req->engine;
+	u32 seqno = i915_gem_request_global_seqno(req);
+
+	/* The request was dequeued before we were awoken. We check after
+	 * inspecting the hw to confirm that this was the same request
+	 * that generated the HWS update. The memory barriers within
+	 * the request execution are sufficient to ensure that a check
+	 * after reading the value from hw matches this request.
+	 */
+	if (!seqno)
+		return false;
 
 	/* Before we do the heavier coherent read of the seqno,
 	 * check the value (hopefully) in the CPU cacheline.
 	 */
-	if (__i915_gem_request_completed(req))
+	if (__i915_gem_request_completed(req, seqno))
 		return true;
 
 	/* Ensure our read of the seqno is coherent so that we
@@ -4063,7 +4073,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
 			wake_up_process(tsk);
 		rcu_read_unlock();
 
-		if (__i915_gem_request_completed(req))
+		if (__i915_gem_request_completed(req, seqno))
 			return true;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 766820ae9985..a7eea5ff44ca 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -400,7 +400,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
 		i915_gem_request_retire_upto(rq);
 
-	if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
+	if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
 		/* The GPU is now idle and this client has stalled.
 		 * Since no other client has submitted a request in the
 		 * meantime, assume that this client is the only one
@@ -2612,7 +2612,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
 struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *request;
+	struct drm_i915_gem_request *request, *active = NULL;
+	unsigned long flags;
 
 	/* We are called by the error capture and reset at a random
 	 * point in time. In particular, note that neither is crucially
@@ -2622,17 +2623,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	 * extra delay for a recent interrupt is pointless. Hence, we do
 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
 	 */
+	spin_lock_irqsave(&engine->timeline->lock, flags);
 	list_for_each_entry(request, &engine->timeline->requests, link) {
-		if (__i915_gem_request_completed(request))
+		if (__i915_gem_request_completed(request,
+						 request->global_seqno))
 			continue;
 
 		GEM_BUG_ON(request->engine != engine);
 		GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				    &request->fence.flags));
-		return request;
+
+		active = request;
+		break;
 	}
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 
-	return NULL;
+	return active;
 }
 
 static bool engine_stalled(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index bb59acaa8a34..5db4fd1eabcd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -412,7 +412,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 		intel_engine_enable_signaling(request);
 	spin_unlock(&request->lock);
 
-	GEM_BUG_ON(!request->global_seqno);
 	engine->emit_breadcrumb(request,
 				request->ring->vaddr + request->postfix);
 
@@ -498,7 +497,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->timeline->requests,
 				       typeof(*req), link);
-	if (req && __i915_gem_request_completed(req))
+	if (req && i915_gem_request_completed(req))
 		i915_gem_request_retire(req);
 
 	/* Beware: Dragons be flying overhead.
@@ -604,6 +603,7 @@ static int
 i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			       struct drm_i915_gem_request *from)
 {
+	u32 seqno;
 	int ret;
 
 	GEM_BUG_ON(to == from);
@@ -626,14 +626,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 		return ret < 0 ? ret : 0;
 	}
 
-	if (!from->global_seqno) {
+	seqno = i915_gem_request_global_seqno(from);
+	if (!seqno) {
 		ret = i915_sw_fence_await_dma_fence(&to->submit,
 						    &from->fence, 0,
 						    GFP_KERNEL);
 		return ret < 0 ? ret : 0;
 	}
 
-	if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
+	if (seqno <= to->timeline->sync_seqno[from->engine->id])
 		return 0;
 
 	trace_i915_gem_ring_sync_to(to, from);
@@ -651,7 +652,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			return ret;
 	}
 
-	to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
+	to->timeline->sync_seqno[from->engine->id] = seqno;
 	return 0;
 }
 
@@ -931,7 +932,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *req,
-			 int state, unsigned long timeout_us)
+			 u32 seqno, int state, unsigned long timeout_us)
 {
 	unsigned int cpu;
 
@@ -947,7 +948,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 
 	timeout_us += local_clock_us(&cpu);
 	do {
-		if (__i915_gem_request_completed(req))
+		if (seqno != i915_gem_request_global_seqno(req))
+			break;
+
+		if (i915_seqno_passed(intel_engine_get_seqno(req->engine),
+				      seqno))
 			return true;
 
 		if (signal_pending_state(state, current))
@@ -1011,11 +1016,15 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
+	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
+
 	reset_wait_queue(&req->execute, &exec);
-	if (!req->global_seqno) {
+	if (!wait.seqno) {
 		do {
 			set_current_state(state);
-			if (req->global_seqno)
+
+			wait.seqno = i915_gem_request_global_seqno(req);
+			if (wait.seqno)
 				break;
 
 			if (flags & I915_WAIT_LOCKED &&
@@ -1043,7 +1052,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		if (timeout < 0)
 			goto complete;
 
-		GEM_BUG_ON(!req->global_seqno);
+		GEM_BUG_ON(!wait.seqno);
 	}
 	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
@@ -1052,7 +1061,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	intel_wait_init(&wait, req->global_seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
 		/* In order to check that we haven't missed the interrupt
 		 * as we enabled it, we need to kick ourselves to do a
@@ -1073,7 +1081,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 		timeout = io_schedule_timeout(timeout);
 
-		if (intel_wait_complete(&wait))
+		if (intel_wait_complete(&wait) &&
+		    i915_gem_request_global_seqno(req) == wait.seqno)
 			break;
 
 		set_current_state(state);
@@ -1124,14 +1133,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 static void engine_retire_requests(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request, *next;
+	u32 seqno = intel_engine_get_seqno(engine);
+	LIST_HEAD(retire);
 
+	spin_lock_irq(&engine->timeline->lock);
 	list_for_each_entry_safe(request, next,
 				 &engine->timeline->requests, link) {
-		if (!__i915_gem_request_completed(request))
-			return;
+		if (!i915_seqno_passed(seqno, request->global_seqno))
+			break;
 
-		i915_gem_request_retire(request);
+		list_move_tail(&request->link, &retire);
 	}
+	spin_unlock_irq(&engine->timeline->lock);
+
+	list_for_each_entry_safe(request, next, &retire, link)
+		i915_gem_request_retire(request);
 }
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 467d3e13fce0..b81f6709905c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -135,6 +135,11 @@ struct drm_i915_gem_request {
 	struct i915_priotree priotree;
 	struct i915_dependency dep;
 
+	/** GEM sequence number associated with this request on the
+	 * global execution timeline. It is zero when the request is not
+	 * on the HW queue (i.e. not on the engine timeline list).
+	 * Its value is guarded by the timeline spinlock.
+	 */
 	u32 global_seqno;
 
 	/** Position in the ring of the start of the request */
@@ -229,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 	*pdst = src;
 }
 
+/**
+ * i915_gem_request_global_seqno - report the current global seqno
+ * @request - the request
+ *
+ * A request is assigned a global seqno only when it is on the hardware
+ * execution queue. The global seqno can be used to maintain a list of
+ * requests on the same engine in retirement order, for example for
+ * constructing a priority queue for waiting. Prior to its execution, or
+ * if it is subsequently removed in the event of preemption, its global
+ * seqno is zero. As both insertion and removal from the execution queue
+ * may operate in IRQ context, it is not guarded by the usual struct_mutex
+ * BKL. Instead those relying on the global seqno must be prepared for its
+ * value to change between reads. Only when the request is complete can
+ * the global seqno be stable (due to the memory barriers on submitting
+ * the commands to the hardware to write the breadcrumb, if the HWS shows
+ * that it has passed the global seqno and the global seqno is unchanged
+ * after the read, it is indeed complete).
+ */
+static u32
+i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
+{
+	return READ_ONCE(request->global_seqno);
+}
+
 int
 i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
@@ -269,46 +298,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
 }
 
 static inline bool
-__i915_gem_request_started(const struct drm_i915_gem_request *req)
+__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
 {
-	GEM_BUG_ON(!req->global_seqno);
+	GEM_BUG_ON(!seqno);
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->global_seqno - 1);
+				 seqno - 1);
 }
 
 static inline bool
 i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
-	if (!req->global_seqno)
+	u32 seqno;
+
+	seqno = i915_gem_request_global_seqno(req);
+	if (!seqno)
 		return false;
 
-	return __i915_gem_request_started(req);
+	return __i915_gem_request_started(req, seqno);
 }
 
 static inline bool
-__i915_gem_request_completed(const struct drm_i915_gem_request *req)
+__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
 {
-	GEM_BUG_ON(!req->global_seqno);
-	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->global_seqno);
+	GEM_BUG_ON(!seqno);
+	return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
+		seqno == i915_gem_request_global_seqno(req);
 }
 
 static inline bool
 i915_gem_request_completed(const struct drm_i915_gem_request *req)
 {
-	if (!req->global_seqno)
+	u32 seqno;
+
+	seqno = i915_gem_request_global_seqno(req);
+	if (!seqno)
 		return false;
 
-	return __i915_gem_request_completed(req);
+	return __i915_gem_request_completed(req, seqno);
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *request,
-			 int state, unsigned long timeout_us);
+			 u32 seqno, int state, unsigned long timeout_us);
 static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
 				     int state, unsigned long timeout_us)
 {
-	return (__i915_gem_request_started(request) &&
-		__i915_spin_request(request, state, timeout_us));
+	u32 seqno;
+
+	seqno = i915_gem_request_global_seqno(request);
+	return (__i915_gem_request_started(request, seqno) &&
+		__i915_spin_request(request, seqno, state, timeout_us));
 }
 
 /* We treat requests as fences. This is not be to confused with our
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 4f859e423ef3..1af7b4814bfb 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -523,6 +523,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	struct rb_node *parent, **p;
 	bool first, wakeup;
+	u32 seqno;
 
 	/* Note that we may be called from an interrupt handler on another
 	 * device (e.g. nouveau signaling a fence completion causing us
@@ -533,11 +534,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 
 	/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
 	assert_spin_locked(&request->lock);
-	if (!request->global_seqno)
+
+	seqno = i915_gem_request_global_seqno(request);
+	if (!seqno)
 		return;
 
 	request->signaling.wait.tsk = b->signaler;
-	request->signaling.wait.seqno = request->global_seqno;
+	request->signaling.wait.seqno = seqno;
 	i915_gem_request_get(request);
 
 	spin_lock(&b->lock);
@@ -561,8 +564,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	p = &b->signals.rb_node;
 	while (*p) {
 		parent = *p;
-		if (i915_seqno_passed(request->global_seqno,
-				      to_signaler(parent)->global_seqno)) {
+		if (i915_seqno_passed(seqno,
+				      to_signaler(parent)->signaling.wait.seqno)) {
 			p = &parent->rb_right;
 			first = false;
 		} else {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 07/14] drm/i915: Take a reference whilst processing the signaler request
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (5 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 08/14] drm/i915: Allow an request to be cancelled Chris Wilson
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

The plan in the near-future is to allow requests to be removed from the
signaler. We can no longer then rely on holding a reference to the
request for the duration it is in the signaling tree, and instead must
obtain a reference to the request for the current operation using RCU.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 24 ++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 1af7b4814bfb..1445a8ff2e02 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -474,7 +474,11 @@ static int intel_breadcrumbs_signaler(void *arg)
 		 * need to wait for a new interrupt from the GPU or for
 		 * a new client.
 		 */
-		request = READ_ONCE(b->first_signal);
+		rcu_read_lock();
+		request = rcu_dereference(b->first_signal);
+		if (request)
+			request = i915_gem_request_get_rcu(request);
+		rcu_read_unlock();
 		if (signal_complete(request)) {
 			local_bh_disable();
 			dma_fence_signal(&request->fence);
@@ -493,24 +497,28 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 * the oldest before picking the next one.
 			 */
 			spin_lock_irq(&b->lock);
-			if (request == b->first_signal) {
+			if (request == rcu_access_pointer(b->first_signal)) {
 				struct rb_node *rb =
 					rb_next(&request->signaling.node);
-				b->first_signal = rb ? to_signaler(rb) : NULL;
+				rcu_assign_pointer(b->first_signal,
+						   rb ? to_signaler(rb) : NULL);
 			}
 			rb_erase(&request->signaling.node, &b->signals);
 			spin_unlock_irq(&b->lock);
 
 			i915_gem_request_put(request);
 		} else {
-			if (kthread_should_stop())
+			if (kthread_should_stop()) {
+				GEM_BUG_ON(request);
 				break;
+			}
 
 			schedule();
 
 			if (kthread_should_park())
 				kthread_parkme();
 		}
+		i915_gem_request_put(request);
 	} while (1);
 	__set_current_state(TASK_RUNNING);
 
@@ -575,7 +583,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	rb_link_node(&request->signaling.node, parent, p);
 	rb_insert_color(&request->signaling.node, &b->signals);
 	if (first)
-		smp_store_mb(b->first_signal, request);
+		rcu_assign_pointer(b->first_signal, request);
 
 	spin_unlock(&b->lock);
 
@@ -649,7 +657,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 	/* The engines should be idle and all requests accounted for! */
 	WARN_ON(READ_ONCE(b->first_wait));
 	WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
-	WARN_ON(READ_ONCE(b->first_signal));
+	WARN_ON(rcu_access_pointer(b->first_signal));
 	WARN_ON(!RB_EMPTY_ROOT(&b->signals));
 
 	if (!IS_ERR_OR_NULL(b->signaler))
@@ -665,8 +673,8 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&b->lock);
 
-	if (b->first_wait) {
-		wake_up_process(b->first_wait->tsk);
+	if (rcu_access_pointer(b->first_signal)) {
+		wake_up_process(b->signaler);
 		busy |= intel_engine_flag(engine);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 9412c8f58619..63d4d203654c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -242,7 +242,7 @@ struct intel_engine_cs {
 		struct rb_root signals; /* sorted by retirement */
 		struct intel_wait *first_wait; /* oldest waiter by retirement */
 		struct task_struct *signaler; /* used for fence signalling */
-		struct drm_i915_gem_request *first_signal;
+		struct drm_i915_gem_request __rcu *first_signal;
 		struct timer_list fake_irq; /* used after a missed interrupt */
 		struct timer_list hangcheck; /* detect missed interrupts */
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 08/14] drm/i915: Allow an request to be cancelled
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (6 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 07/14] drm/i915: Take a reference whilst processing the signaler request Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 09/14] drm/i915: Remove the preempted request from the execution queue Chris Wilson
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

If we preempt a request and remove it from the execution queue, we need
to undo its global seqno and restart any waiters.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 71 +++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 +
 2 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 1445a8ff2e02..420bbaf1f1c0 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -333,22 +333,15 @@ static inline int wakeup_priority(struct intel_breadcrumbs *b,
 		return tsk->prio;
 }
 
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-			      struct intel_wait *wait)
+static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
+				       struct intel_wait *wait)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	/* Quick check to see if this waiter was already decoupled from
-	 * the tree by the bottom-half to avoid contention on the spinlock
-	 * by the herd.
-	 */
-	if (RB_EMPTY_NODE(&wait->node))
-		return;
-
-	spin_lock_irq(&b->lock);
+	assert_spin_locked(&b->lock);
 
 	if (RB_EMPTY_NODE(&wait->node))
-		goto out_unlock;
+		goto out;
 
 	if (b->first_wait == wait) {
 		const int priority = wakeup_priority(b, wait->tsk);
@@ -414,11 +407,27 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 	GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
 	rb_erase(&wait->node, &b->waiters);
 
-out_unlock:
+out:
 	GEM_BUG_ON(b->first_wait == wait);
 	GEM_BUG_ON(rb_first(&b->waiters) !=
 		   (b->first_wait ? &b->first_wait->node : NULL));
 	GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
+}
+
+void intel_engine_remove_wait(struct intel_engine_cs *engine,
+			      struct intel_wait *wait)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	/* Quick check to see if this waiter was already decoupled from
+	 * the tree by the bottom-half to avoid contention on the spinlock
+	 * by the herd.
+	 */
+	if (RB_EMPTY_NODE(&wait->node))
+		return;
+
+	spin_lock_irq(&b->lock);
+	__intel_engine_remove_wait(engine, wait);
 	spin_unlock_irq(&b->lock);
 }
 
@@ -484,11 +493,13 @@ static int intel_breadcrumbs_signaler(void *arg)
 			dma_fence_signal(&request->fence);
 			local_bh_enable(); /* kick start the tasklets */
 
+			spin_lock_irq(&b->lock);
+
 			/* Wake up all other completed waiters and select the
 			 * next bottom-half for the next user interrupt.
 			 */
-			intel_engine_remove_wait(engine,
-						 &request->signaling.wait);
+			__intel_engine_remove_wait(engine,
+						   &request->signaling.wait);
 
 			/* Find the next oldest signal. Note that as we have
 			 * not been holding the lock, another client may
@@ -496,7 +507,6 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 * we just completed - so double check we are still
 			 * the oldest before picking the next one.
 			 */
-			spin_lock_irq(&b->lock);
 			if (request == rcu_access_pointer(b->first_signal)) {
 				struct rb_node *rb =
 					rb_next(&request->signaling.node);
@@ -504,6 +514,8 @@ static int intel_breadcrumbs_signaler(void *arg)
 						   rb ? to_signaler(rb) : NULL);
 			}
 			rb_erase(&request->signaling.node, &b->signals);
+			RB_CLEAR_NODE(&request->signaling.node);
+
 			spin_unlock_irq(&b->lock);
 
 			i915_gem_request_put(request);
@@ -591,6 +603,35 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 		wake_up_process(b->signaler);
 }
 
+void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	assert_spin_locked(&request->lock);
+	GEM_BUG_ON(!request->signaling.wait.seqno);
+
+	spin_lock(&b->lock);
+
+	if (!RB_EMPTY_NODE(&request->signaling.node)) {
+		if (request == rcu_access_pointer(b->first_signal)) {
+			struct rb_node *rb =
+				rb_next(&request->signaling.node);
+			rcu_assign_pointer(b->first_signal,
+					   rb ? to_signaler(rb) : NULL);
+		}
+		rb_erase(&request->signaling.node, &b->signals);
+		RB_CLEAR_NODE(&request->signaling.node);
+	}
+
+	__intel_engine_remove_wait(engine, &request->signaling.wait);
+
+	spin_unlock(&b->lock);
+
+	request->signaling.wait.seqno = 0;
+	i915_gem_request_put(request);
+}
+
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 63d4d203654c..dfb2ffe4f1d1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -605,6 +605,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait);
 void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
+void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
 
 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
 {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 09/14] drm/i915: Remove the preempted request from the execution queue
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (7 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 08/14] drm/i915: Allow an request to be cancelled Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 10/14] drm/i915: Exercise request cancellation using a mock selftest Chris Wilson
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

After the request is cancelled, we then need to remove it from the
global execution timeline and return it to the context timeline, the
inverse of submit_request().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c  | 58 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_request.h  |  3 ++
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 19 +++++++++--
 3 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5db4fd1eabcd..56ebb653406d 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -435,6 +435,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
+void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_timeline *timeline;
+
+	assert_spin_locked(&engine->timeline->lock);
+
+	/* Only unwind in reverse order, required so that the per-context list
+	 * is kept in seqno/ring order.
+	 */
+	GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
+	engine->timeline->seqno--;
+
+	/* We may be recursing from the signal callback of another i915 fence */
+	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+	request->global_seqno = 0;
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+		intel_engine_cancel_signaling(request);
+	spin_unlock(&request->lock);
+
+	/* Transfer back from the global per-engine timeline to per-context */
+	timeline = request->timeline;
+	GEM_BUG_ON(timeline == engine->timeline);
+
+	spin_lock(&timeline->lock);
+	list_move(&request->link, &timeline->requests);
+	spin_unlock(&timeline->lock);
+
+	/* We don't need to wake_up any waiters on request->execute, they
+	 * will get woken by any other event or us re-adding this request
+	 * to the engine timeline (__i915_gem_request_submit()). The waiters
+	 * should be quite adapt at finding that the request now has a new
+	 * global_seqno to the one they went to sleep on.
+	 */
+}
+
+void i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	unsigned long flags;
+
+	/* Will be called from irq-context when using foreign fences. */
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+
+	__i915_gem_request_unsubmit(request);
+
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+}
+
 static int __i915_sw_fence_call
 submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 {
@@ -1016,9 +1065,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
-	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
+	wait.tsk = current;
 
+restart:
 	reset_wait_queue(&req->execute, &exec);
+	wait.seqno = i915_gem_request_global_seqno(req);
 	if (!wait.seqno) {
 		do {
 			set_current_state(state);
@@ -1117,6 +1168,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
 			break;
+
+		if (i915_gem_request_global_seqno(req) != wait.seqno) {
+			intel_engine_remove_wait(req->engine, &wait);
+			goto restart;
+		}
 	}
 
 	intel_engine_remove_wait(req->engine, &wait);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index b81f6709905c..5f73d8c0a38a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 void __i915_gem_request_submit(struct drm_i915_gem_request *request);
 void i915_gem_request_submit(struct drm_i915_gem_request *request);
 
+void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
+void i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
+
 struct intel_rps_client;
 #define NO_WAITBOOST ERR_PTR(-1)
 #define IS_RPS_CLIENT(p) (!IS_ERR(p))
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 420bbaf1f1c0..39a0f87a32d0 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -431,7 +431,14 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 	spin_unlock_irq(&b->lock);
 }
 
-static bool signal_complete(struct drm_i915_gem_request *request)
+static bool signal_valid(const struct drm_i915_gem_request *request)
+{
+	u32 seqno = READ_ONCE(request->global_seqno);
+
+	return seqno == request->signaling.wait.seqno;
+}
+
+static bool signal_complete(const struct drm_i915_gem_request *request)
 {
 	if (!request)
 		return false;
@@ -440,7 +447,7 @@ static bool signal_complete(struct drm_i915_gem_request *request)
 	 * signalled that this wait is already completed.
 	 */
 	if (intel_wait_complete(&request->signaling.wait))
-		return true;
+		return signal_valid(request);
 
 	/* Carefully check if the request is complete, giving time for the
 	 * seqno to be visible or if the GPU hung.
@@ -520,13 +527,21 @@ static int intel_breadcrumbs_signaler(void *arg)
 
 			i915_gem_request_put(request);
 		} else {
+			DEFINE_WAIT(exec);
+
 			if (kthread_should_stop()) {
 				GEM_BUG_ON(request);
 				break;
 			}
 
+			if (request)
+				add_wait_queue(&request->execute, &exec);
+
 			schedule();
 
+			if (request)
+				remove_wait_queue(&request->execute, &exec);
+
 			if (kthread_should_park())
 				kthread_parkme();
 		}
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 10/14] drm/i915: Exercise request cancellation using a mock selftest
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (8 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 09/14] drm/i915: Remove the preempted request from the execution queue Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 11/14] drm/i915: Replace reset_wait_queue with default_wake_function Chris Wilson
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

Add a mock selftest to preempt a request and check that we cancel it,
requeue the request and then complete its execution.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/selftests/i915_gem_request.c | 59 +++++++++++++++++++++++
 drivers/gpu/drm/i915/selftests/mock_request.c     | 19 ++++++++
 drivers/gpu/drm/i915/selftests/mock_request.h     |  2 +
 3 files changed, 80 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 9d056a86723d..c803ef60a127 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -26,6 +26,7 @@
 
 #include "../i915_selftest.h"
 
+#include "mock_context.h"
 #include "mock_gem_device.h"
 
 static int igt_add_request(void *arg)
@@ -181,12 +182,70 @@ static int igt_fence_wait(void *arg)
 	return err;
 }
 
+static int igt_request_rewind(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_request *request, *vip;
+	struct i915_gem_context *ctx[2];
+	int err = -EINVAL;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	ctx[0] = mock_context(i915, "A");
+	request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ);
+	if (!request) {
+		err = -ENOMEM;
+		goto err_device;
+	}
+	i915_add_request(request);
+
+	ctx[1] = mock_context(i915, "B");
+	vip = mock_request(i915->engine[RCS], ctx[1], 0);
+	if (!vip) {
+		err = -ENOMEM;
+		goto err_locked;
+	}
+
+	/* Simulate preemption by manual reordering */
+	if (!mock_cancel_request(request)) {
+		pr_err("failed to cancel request (already executed)!\n");
+		i915_add_request(vip);
+		goto err_locked;
+	}
+	i915_add_request(vip);
+	request->engine->submit_request(request);
+
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (i915_wait_request(vip, 0, HZ) == -ETIME) {
+		pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n",
+		       vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS]));
+		goto err;
+	}
+
+	if (i915_gem_request_completed(request)) {
+		pr_err("low priority request already completed\n");
+		goto err;
+	}
+
+	err = 0;
+err:
+	mutex_lock(&i915->drm.struct_mutex);
+err_locked:
+	mock_context_close(ctx[1]);
+	mock_context_close(ctx[0]);
+err_device:
+	mock_device_flush(i915);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int i915_gem_request_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_add_request),
 		SUBTEST(igt_wait_request),
 		SUBTEST(igt_fence_wait),
+		SUBTEST(igt_request_rewind),
 	};
 	struct drm_i915_private *i915;
 	int err;
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index e23242d1b88a..0e8d2e7f8c70 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include "mock_engine.h"
 #include "mock_request.h"
 
 struct drm_i915_gem_request *
@@ -42,3 +43,21 @@ mock_request(struct intel_engine_cs *engine,
 
 	return &mock->base;
 }
+
+bool mock_cancel_request(struct drm_i915_gem_request *request)
+{
+	struct mock_request *mock = container_of(request, typeof(*mock), base);
+	struct mock_engine *engine =
+		container_of(request->engine, typeof(*engine), base);
+	bool was_queued;
+
+	spin_lock_irq(&engine->hw_lock);
+	was_queued = !list_empty(&mock->link);
+	list_del_init(&mock->link);
+	spin_unlock_irq(&engine->hw_lock);
+
+	if (was_queued)
+		i915_gem_request_unsubmit(request);
+
+	return was_queued;
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h
index cc76d4f4eb4e..4dea74c8e96d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.h
+++ b/drivers/gpu/drm/i915/selftests/mock_request.h
@@ -41,4 +41,6 @@ mock_request(struct intel_engine_cs *engine,
 	     struct i915_gem_context *context,
 	     unsigned long delay);
 
+bool mock_cancel_request(struct drm_i915_gem_request *request);
+
 #endif /* !__MOCK_REQUEST__ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 11/14] drm/i915: Replace reset_wait_queue with default_wake_function
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (9 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 10/14] drm/i915: Exercise request cancellation using a mock selftest Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 12/14] drm/i915: Refactor direct GPU reset from request waiters Chris Wilson
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

If we change the wait_queue_t from using the autoremove_wake_function to
the default_wake_function, we no longer have to restore the wait_queue_t
entry on the wait_queue_head_t list after being woken up by it, as we
are unusual in sleeping multiple times on the same wait_queue_t.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 56ebb653406d..bd921c4b0100 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -938,16 +938,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 }
 
-static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-
 static unsigned long local_clock_us(unsigned int *cpu)
 {
 	unsigned long t;
@@ -1042,8 +1032,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
-	DEFINE_WAIT(reset);
-	DEFINE_WAIT(exec);
+	DEFINE_WAIT_FUNC(reset, default_wake_function);
+	DEFINE_WAIT_FUNC(exec, default_wake_function);
 	struct intel_wait wait;
 
 	might_sleep();
@@ -1062,13 +1052,13 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	trace_i915_gem_request_wait_begin(req);
 
+	add_wait_queue(&req->execute, &exec);
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
 	wait.tsk = current;
 
 restart:
-	reset_wait_queue(&req->execute, &exec);
 	wait.seqno = i915_gem_request_global_seqno(req);
 	if (!wait.seqno) {
 		do {
@@ -1082,26 +1072,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 			    i915_reset_in_progress(&req->i915->gpu_error)) {
 				__set_current_state(TASK_RUNNING);
 				i915_reset(req->i915);
-				reset_wait_queue(errq, &reset);
 				continue;
 			}
 
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
-				break;
+				goto complete;
 			}
 
 			if (!timeout) {
 				timeout = -ETIME;
-				break;
+				goto complete;
 			}
 
 			timeout = io_schedule_timeout(timeout);
 		} while (1);
-		finish_wait(&req->execute, &exec);
-
-		if (timeout < 0)
-			goto complete;
 
 		GEM_BUG_ON(!wait.seqno);
 	}
@@ -1161,7 +1146,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		    i915_reset_in_progress(&req->i915->gpu_error)) {
 			__set_current_state(TASK_RUNNING);
 			i915_reset(req->i915);
-			reset_wait_queue(errq, &reset);
 			continue;
 		}
 
@@ -1176,11 +1160,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	}
 
 	intel_engine_remove_wait(req->engine, &wait);
-	__set_current_state(TASK_RUNNING);
-
 complete:
+	__set_current_state(TASK_RUNNING);
 	if (flags & I915_WAIT_LOCKED)
 		remove_wait_queue(errq, &reset);
+	remove_wait_queue(&req->execute, &exec);
 	trace_i915_gem_request_wait_end(req);
 
 	return timeout;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 12/14] drm/i915: Refactor direct GPU reset from request waiters
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (10 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 11/14] drm/i915: Replace reset_wait_queue with default_wake_function Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 13/14] drm/i915: Immediately process a reset before starting waiting Chris Wilson
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

Combine the common code for the pair of waiters into a single function.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index bd921c4b0100..6f7beaeda3ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1006,6 +1006,16 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	return false;
 }
 
+static bool __i915_reset_request(struct drm_i915_gem_request *request)
+{
+	if (likely(!i915_reset_in_progress(&request->i915->gpu_error)))
+		return false;
+
+	__set_current_state(TASK_RUNNING);
+	i915_reset(request->i915);
+	return true;
+}
+
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: the request to wait upon
@@ -1069,11 +1079,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 				break;
 
 			if (flags & I915_WAIT_LOCKED &&
-			    i915_reset_in_progress(&req->i915->gpu_error)) {
-				__set_current_state(TASK_RUNNING);
-				i915_reset(req->i915);
+			    __i915_reset_request(req))
 				continue;
-			}
 
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
@@ -1142,12 +1149,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		 * to come along and update the breadcrumb (either directly
 		 * itself, or indirectly by recovering the GPU).
 		 */
-		if (flags & I915_WAIT_LOCKED &&
-		    i915_reset_in_progress(&req->i915->gpu_error)) {
-			__set_current_state(TASK_RUNNING);
-			i915_reset(req->i915);
+		if (flags & I915_WAIT_LOCKED && __i915_reset_request(req))
 			continue;
-		}
 
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 13/14] drm/i915: Immediately process a reset before starting waiting
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (11 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 12/14] drm/i915: Refactor direct GPU reset from request waiters Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-14  9:54 ` [PATCH v2 14/14] drm/i915: Remove one level of indention from wait-for-execute Chris Wilson
  2017-02-14 11:52 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/14] drm/i915: Keep a global seqno per-engine Patchwork
  14 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

As we handoff the GPU reset to the waiter, we need to check we don't
miss a wakeup if it has already been sent prior to us starting the wait.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 6f7beaeda3ff..06af48c28045 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1063,8 +1063,10 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	trace_i915_gem_request_wait_begin(req);
 
 	add_wait_queue(&req->execute, &exec);
-	if (flags & I915_WAIT_LOCKED)
+	if (flags & I915_WAIT_LOCKED) {
 		add_wait_queue(errq, &reset);
+		__i915_reset_request(req);
+	}
 
 	wait.tsk = current;
 
@@ -1104,7 +1106,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	if (intel_engine_add_wait(req->engine, &wait))
+	if (intel_engine_add_wait(req->engine, &wait) ||
+	    flags & I915_WAIT_LOCKED)
 		/* In order to check that we haven't missed the interrupt
 		 * as we enabled it, we need to kick ourselves to do a
 		 * coherent check on the seqno before we sleep.
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 14/14] drm/i915: Remove one level of indention from wait-for-execute
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (12 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 13/14] drm/i915: Immediately process a reset before starting waiting Chris Wilson
@ 2017-02-14  9:54 ` Chris Wilson
  2017-02-14 11:52 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/14] drm/i915: Keep a global seqno per-engine Patchwork
  14 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-14  9:54 UTC (permalink / raw)
  To: intel-gfx

Now that the code is getting simpler, we can reduce the indentation when
waiting for the global_seqno.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 41 +++++++++++++++------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 06af48c28045..fa98bbf35337 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1071,34 +1071,29 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	wait.tsk = current;
 
 restart:
-	wait.seqno = i915_gem_request_global_seqno(req);
-	if (!wait.seqno) {
-		do {
-			set_current_state(state);
-
-			wait.seqno = i915_gem_request_global_seqno(req);
-			if (wait.seqno)
-				break;
+	do {
+		set_current_state(state);
+		wait.seqno = i915_gem_request_global_seqno(req);
+		if (wait.seqno)
+			break;
 
-			if (flags & I915_WAIT_LOCKED &&
-			    __i915_reset_request(req))
-				continue;
+		if (flags & I915_WAIT_LOCKED && __i915_reset_request(req))
+			continue;
 
-			if (signal_pending_state(state, current)) {
-				timeout = -ERESTARTSYS;
-				goto complete;
-			}
+		if (signal_pending_state(state, current)) {
+			timeout = -ERESTARTSYS;
+			goto complete;
+		}
 
-			if (!timeout) {
-				timeout = -ETIME;
-				goto complete;
-			}
+		if (!timeout) {
+			timeout = -ETIME;
+			goto complete;
+		}
 
-			timeout = io_schedule_timeout(timeout);
-		} while (1);
+		timeout = io_schedule_timeout(timeout);
+	} while (1);
 
-		GEM_BUG_ON(!wait.seqno);
-	}
+	GEM_BUG_ON(!wait.seqno);
 	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
 	/* Optimistic short spin before touching IRQs */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v2,01/14] drm/i915: Keep a global seqno per-engine
  2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
                   ` (13 preceding siblings ...)
  2017-02-14  9:54 ` [PATCH v2 14/14] drm/i915: Remove one level of indention from wait-for-execute Chris Wilson
@ 2017-02-14 11:52 ` Patchwork
  14 siblings, 0 replies; 30+ messages in thread
From: Patchwork @ 2017-02-14 11:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,01/14] drm/i915: Keep a global seqno per-engine
URL   : https://patchwork.freedesktop.org/series/19606/
State : success

== Summary ==

Series 19606v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/19606/revisions/1/mbox/

fi-bdw-5557u     total:252  pass:241  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:252  pass:213  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:252  pass:233  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:252  pass:225  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:252  pass:221  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:252  pass:202  dwarn:0   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:252  pass:235  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:252  pass:230  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:252  pass:224  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:252  pass:223  dwarn:0   dfail:0   fail:0   skip:29 

37f3b76e282e6d7fdd7d6c85d782298af1a0793b drm-tip: 2017y-02m-14d-09h-39m-22s UTC integration manifest
f0a2c23 drm/i915: Remove one level of indention from wait-for-execute
e720eef drm/i915: Immediately process a reset before starting waiting
137924f drm/i915: Refactor direct GPU reset from request waiters
0bd2a57 drm/i915: Replace reset_wait_queue with default_wake_function
471a9ad drm/i915: Exercise request cancellation using a mock selftest
aa78163 drm/i915: Remove the preempted request from the execution queue
5249a21 drm/i915: Allow an request to be cancelled
54a3c93 drm/i915: Take a reference whilst processing the signaler request
463d92f drm/i915: Protect the request->global_seqno with the engine->timeline lock
56fccb6 drm/i915: Deconstruct execute fence
fc61d11 drm/i915: Inline __i915_gem_request_wait_for_execute()
9bf754a drm/i915: Add ourselves to the gpu error waitqueue for the entire wait
15ec9ca drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue
8d4b430 drm/i915: Keep a global seqno per-engine

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3798/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine
  2017-02-14  9:54 ` [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
@ 2017-02-15 17:05   ` Tvrtko Ursulin
  2017-02-15 21:49     ` Chris Wilson
  0 siblings, 1 reply; 30+ messages in thread
From: Tvrtko Ursulin @ 2017-02-15 17:05 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/02/2017 09:54, Chris Wilson wrote:
> Replace the global device seqno with one for each engine, and account
> for in-flight seqno on each separately. This is consistent with
> dma-fence as each timeline has separate fence-contexts for each engine
> and a seqno is only ordered within a fence-context (i.e.  seqno do not
> need to be ordered wrt to other engines, just ordered within a single
> engine). This is required to enable request rewinding for preemption on
> individual engines (we have to rewind the global seqno to avoid
> overflow, and we do not have to rewind all engines just to preempt one.)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      |  5 +--
>  drivers/gpu/drm/i915/i915_gem_request.c  | 68 +++++++++++++++-----------------
>  drivers/gpu/drm/i915/i915_gem_request.h  |  8 +---
>  drivers/gpu/drm/i915/i915_gem_timeline.h |  4 +-
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 +++++++---------
>  drivers/gpu/drm/i915/intel_engine_cs.c   |  2 -
>  drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
>  7 files changed, 52 insertions(+), 72 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index cda957c674ee..9b636962cab6 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1080,10 +1080,7 @@ static const struct file_operations i915_error_state_fops = {
>  static int
>  i915_next_seqno_get(void *data, u64 *val)
>  {
> -	struct drm_i915_private *dev_priv = data;
> -
> -	*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
> -	return 0;
> +	return -ENODEV;

I assume reason for leaving this function in this state appears in a 
later patch? gt.global_timeline stays around for something else?

>  }
>
>  static int
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 24571a5289a4..2d84da0e2b39 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -213,7 +213,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>  	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
>  	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
>  	GEM_BUG_ON(!i915_gem_request_completed(request));
> +
>  	GEM_BUG_ON(!request->i915->gt.active_requests);
> +	GEM_BUG_ON(!request->engine->timeline->active_seqno);
>
>  	trace_i915_gem_request_retire(request);
>
> @@ -237,6 +239,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>  				 &request->i915->gt.idle_work,
>  				 msecs_to_jiffies(100));
>  	}
> +	request->engine->timeline->active_seqno--;

Hm, decrementing the seqno, a new concept in our codebase. There is no 
comment where the structure field is added so I guess it's reverse 
engineering time again. :(

Back few minutes later - so this is a count of in flight requests? 
Should you call it active_seqnos then so it becomes obvious it is not a 
seqno as we know it?

It sucks a bit that the decrement is in the infamous retire worker. I 
wonder how hard would it be to DoS the seqno space with it.

Protected by the timeline spinlock?

Would the design work to assume GuC scheduler backend is in and to put 
in flight seqno accounting close to requests coming in and out from the 
hardware?

>
>  	/* Walk through the active list, calling retire on each. This allows
>  	 * objects to track their GPU activity and mark themselves as idle
> @@ -325,15 +328,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
>  	GEM_BUG_ON(i915->gt.active_requests > 1);
>
>  	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
> -	if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
> -		while (intel_breadcrumbs_busy(i915))
> -			cond_resched(); /* spin until threads are complete */
> -	}
> -	atomic_set(&timeline->seqno, seqno);
> +	for_each_engine(engine, i915, id) {
> +		struct intel_timeline *tl = &timeline->engine[id];
>
> -	/* Finally reset hw state */
> -	for_each_engine(engine, i915, id)
> +		if (!i915_seqno_passed(seqno, tl->seqno)) {
> +			/* spin until threads are complete */
> +			while (intel_breadcrumbs_busy(engine))
> +				cond_resched();
> +		}
> +
> +		/* Finally reset hw state */
> +		tl->seqno = seqno;
>  		intel_engine_init_global_seqno(engine, seqno);
> +	}

Came back here a bit later. Shouldn't you just handle one engine in this 
function if seqnos are per-engine now?

>
>  	list_for_each_entry(timeline, &i915->gt.timelines, link) {
>  		for_each_engine(engine, i915, id) {
> @@ -361,34 +368,28 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
>  	return i915_gem_init_global_seqno(dev_priv, seqno - 1);
>  }
>
> -static int reserve_global_seqno(struct drm_i915_private *i915)
> +static int reserve_global_seqno(struct intel_engine_cs *engine)

Rename to reserve_engine_seqno?

>  {
> -	u32 active_requests = ++i915->gt.active_requests;
> -	u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
> +	u32 active = ++engine->timeline->active_seqno;
> +	u32 seqno = engine->timeline->seqno;
>  	int ret;
>
>  	/* Reservation is fine until we need to wrap around */
> -	if (likely(seqno + active_requests > seqno))
> +	if (likely(seqno + active > seqno))

Case for one of the overflows type helpers?

>  		return 0;
>
> -	ret = i915_gem_init_global_seqno(i915, 0);
> +	ret = i915_gem_init_global_seqno(engine->i915, 0);

i915_gem_init_engine_seqno(engine) ?

>  	if (ret) {
> -		i915->gt.active_requests--;
> +		engine->timeline->active_seqno--;

It would be better for the active seqno count to be managed on the same 
level for readability. By that I mean having the decrement in 
add_request where it was incremented.

>  		return ret;
>  	}
>
>  	return 0;
>  }
>
> -static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
> +static u32 timeline_get_seqno(struct intel_timeline *tl)
>  {
> -	/* seqno only incremented under a mutex */
> -	return ++tl->seqno.counter;
> -}
> -
> -static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
> -{
> -	return atomic_inc_return(&tl->seqno);
> +	return ++tl->seqno;
>  }
>
>  void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> @@ -402,14 +403,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
>  	GEM_BUG_ON(timeline == request->timeline);
>  	assert_spin_locked(&timeline->lock);
>
> -	seqno = timeline_get_seqno(timeline->common);
> +	seqno = timeline_get_seqno(timeline);
>  	GEM_BUG_ON(!seqno);
>  	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
>
> -	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
> -	request->previous_seqno = timeline->last_submitted_seqno;
> -	timeline->last_submitted_seqno = seqno;
> -
>  	/* We may be recursing from the signal callback of another i915 fence */
>  	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
>  	request->global_seqno = seqno;

This field could also be renamed to engine_seqno to be more 
self-documenting.

> @@ -514,7 +511,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	if (ret)
>  		return ERR_PTR(ret);
>
> -	ret = reserve_global_seqno(dev_priv);
> +	ret = reserve_global_seqno(engine);
>  	if (ret)
>  		goto err_unpin;
>
> @@ -566,7 +563,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  		       &i915_fence_ops,
>  		       &req->lock,
>  		       req->timeline->fence_context,
> -		       __timeline_get_seqno(req->timeline->common));
> +		       timeline_get_seqno(req->timeline));
>
>  	/* We bump the ref for the fence chain */
>  	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
> @@ -611,6 +608,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	 */
>  	req->head = req->ring->tail;
>
> +	/* Check that we didn't interrupt ourselves with a new request */
> +	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
>  	return req;
>
>  err_ctx:
> @@ -621,7 +620,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>
>  	kmem_cache_free(dev_priv->requests, req);
>  err_unreserve:
> -	dev_priv->gt.active_requests--;
> +	engine->timeline->active_seqno--;
>  err_unpin:
>  	engine->context_unpin(engine, ctx);
>  	return ERR_PTR(ret);
> @@ -833,8 +832,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  	 * our i915_gem_request_alloc() and called __i915_add_request() before
>  	 * us, the timeline will hold its seqno which is later than ours.
>  	 */
> -	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
> -				     request->fence.seqno));
> +	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
>
>  	/*
>  	 * To ensure that this call will not fail, space for its emissions
> @@ -889,16 +887,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  	list_add_tail(&request->link, &timeline->requests);
>  	spin_unlock_irq(&timeline->lock);
>
> -	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
> -				     request->fence.seqno));
> -
> -	timeline->last_submitted_seqno = request->fence.seqno;
> +	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
>  	i915_gem_active_set(&timeline->last_request, request);
>
>  	list_add_tail(&request->ring_link, &ring->request_list);
>  	request->emitted_jiffies = jiffies;
>
> -	i915_gem_mark_busy(engine);
> +	if (!request->i915->gt.active_requests++)
> +		i915_gem_mark_busy(engine);
>
>  	/* Let the backend know a new request has arrived that may need
>  	 * to adjust the existing execution schedule due to a high priority
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index ea511f06efaf..9049936c571c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -145,12 +145,6 @@ struct drm_i915_gem_request {
>
>  	u32 global_seqno;
>
> -	/** GEM sequence number associated with the previous request,
> -	 * when the HWS breadcrumb is equal to this the GPU is processing
> -	 * this request.
> -	 */

Aha removing comments is fine! ...

> -	u32 previous_seqno;
> -
>  	/** Position in the ring of the start of the request */
>  	u32 head;
>
> @@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req)
>  {
>  	GEM_BUG_ON(!req->global_seqno);
>  	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> -				 req->previous_seqno);
> +				 req->global_seqno - 1);
>  }
>
>  static inline bool
> diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
> index f2e51f42cc2f..bc101e644143 100644
> --- a/drivers/gpu/drm/i915/i915_gem_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
> @@ -33,7 +33,8 @@ struct i915_gem_timeline;
>
>  struct intel_timeline {
>  	u64 fence_context;
> -	u32 last_submitted_seqno;
> +	u32 seqno;
> +	u32 active_seqno;

... But adding some back not so much? >:I

>
>  	spinlock_t lock;
>
> @@ -56,7 +57,6 @@ struct intel_timeline {
>
>  struct i915_gem_timeline {
>  	struct list_head link;
> -	atomic_t seqno;
>
>  	struct drm_i915_private *i915;
>  	const char *name;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 74cb7b91b5db..4f859e423ef3 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -655,31 +655,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
>  	cancel_fake_irq(engine);
>  }
>
> -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915)
> +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
>  {
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	unsigned int mask = 0;
> -
> -	for_each_engine(engine, i915, id) {
> -		struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
> -		spin_lock_irq(&b->lock);
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	bool busy = false;
>
> -		if (b->first_wait) {
> -			wake_up_process(b->first_wait->tsk);
> -			mask |= intel_engine_flag(engine);
> -		}
> +	spin_lock_irq(&b->lock);
>
> -		if (b->first_signal) {
> -			wake_up_process(b->signaler);
> -			mask |= intel_engine_flag(engine);
> -		}
> +	if (b->first_wait) {
> +		wake_up_process(b->first_wait->tsk);
> +		busy |= intel_engine_flag(engine);
> +	}
>
> -		spin_unlock_irq(&b->lock);
> +	if (b->first_signal) {
> +		wake_up_process(b->signaler);
> +		busy |= intel_engine_flag(engine);
>  	}
>
> -	return mask;
> +	spin_unlock_irq(&b->lock);
> +
> +	return busy;
>  }
>
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 538d845d7251..1798b87fd934 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -254,8 +254,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
>  		engine->irq_seqno_barrier(engine);
>
>  	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
> -	engine->timeline->last_submitted_seqno = seqno;
> -
>  	engine->hangcheck.seqno = seqno;
>
>  	/* After manually advancing the seqno, fake the interrupt in case
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index cc62e89010d3..9412c8f58619 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -564,7 +564,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
>  	 * wtih serialising this hint with anything, so document it as
>  	 * a hint and nothing more.
>  	 */
> -	return READ_ONCE(engine->timeline->last_submitted_seqno);
> +	return READ_ONCE(engine->timeline->seqno);
>  }
>
>  int init_workarounds_ring(struct intel_engine_cs *engine);
> @@ -637,6 +637,6 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
>
>  void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
>  void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
> -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
> +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
>
>  #endif /* _INTEL_RINGBUFFER_H_ */
>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue
  2017-02-14  9:54 ` [PATCH v2 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
@ 2017-02-15 17:06   ` Tvrtko Ursulin
  0 siblings, 0 replies; 30+ messages in thread
From: Tvrtko Ursulin @ 2017-02-15 17:06 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/02/2017 09:54, Chris Wilson wrote:
> Use a local variable to avoid having to type out the full name of the
> gpu_error wait_queue.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 2d84da0e2b39..f7ff3736797f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -1061,6 +1061,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  {
>  	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
>  		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
> +	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
>  	DEFINE_WAIT(reset);
>  	struct intel_wait wait;
>
> @@ -1096,7 +1097,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>
>  	set_current_state(state);
>  	if (flags & I915_WAIT_LOCKED)
> -		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
> +		add_wait_queue(errq, &reset);
>
>  	intel_wait_init(&wait, req->global_seqno);
>  	if (intel_engine_add_wait(req->engine, &wait))
> @@ -1147,8 +1148,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		    i915_reset_in_progress(&req->i915->gpu_error)) {
>  			__set_current_state(TASK_RUNNING);
>  			i915_reset(req->i915);
> -			reset_wait_queue(&req->i915->gpu_error.wait_queue,
> -					 &reset);
> +			reset_wait_queue(errq, &reset);
>  			continue;
>  		}
>
> @@ -1159,7 +1159,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>
>  	intel_engine_remove_wait(req->engine, &wait);
>  	if (flags & I915_WAIT_LOCKED)
> -		remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
> +		remove_wait_queue(errq, &reset);
>  	__set_current_state(TASK_RUNNING);
>
>  complete:
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait
  2017-02-14  9:54 ` [PATCH v2 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
@ 2017-02-15 17:11   ` Tvrtko Ursulin
  0 siblings, 0 replies; 30+ messages in thread
From: Tvrtko Ursulin @ 2017-02-15 17:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/02/2017 09:54, Chris Wilson wrote:
> Add ourselves to the gpu error waitqueue earllier on, even before we
> determine we have to wait on the seqno. This is so that we can then
> share the waitqueue between stages in subsequent patches.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index f7ff3736797f..dcc0a7ab95dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -1081,6 +1081,9 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>
>  	trace_i915_gem_request_wait_begin(req);
>
> +	if (flags & I915_WAIT_LOCKED)
> +		add_wait_queue(errq, &reset);
> +
>  	if (!i915_sw_fence_done(&req->execute)) {
>  		timeout = __i915_request_wait_for_execute(req, flags, timeout);
>  		if (timeout < 0)
> @@ -1096,9 +1099,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		goto complete;
>
>  	set_current_state(state);
> -	if (flags & I915_WAIT_LOCKED)
> -		add_wait_queue(errq, &reset);
> -
>  	intel_wait_init(&wait, req->global_seqno);
>  	if (intel_engine_add_wait(req->engine, &wait))
>  		/* In order to check that we haven't missed the interrupt
> @@ -1158,11 +1158,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  	}
>
>  	intel_engine_remove_wait(req->engine, &wait);
> -	if (flags & I915_WAIT_LOCKED)
> -		remove_wait_queue(errq, &reset);
>  	__set_current_state(TASK_RUNNING);
>
>  complete:
> +	if (flags & I915_WAIT_LOCKED)
> +		remove_wait_queue(errq, &reset);
>  	trace_i915_gem_request_wait_end(req);
>
>  	return timeout;
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine
  2017-02-15 17:05   ` Tvrtko Ursulin
@ 2017-02-15 21:49     ` Chris Wilson
  2017-02-15 22:20       ` Chris Wilson
                         ` (2 more replies)
  0 siblings, 3 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-15 21:49 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Wed, Feb 15, 2017 at 05:05:40PM +0000, Tvrtko Ursulin wrote:
> 
> On 14/02/2017 09:54, Chris Wilson wrote:
> >Replace the global device seqno with one for each engine, and account
> >for in-flight seqno on each separately. This is consistent with
> >dma-fence as each timeline has separate fence-contexts for each engine
> >and a seqno is only ordered within a fence-context (i.e.  seqno do not
> >need to be ordered wrt to other engines, just ordered within a single
> >engine). This is required to enable request rewinding for preemption on
> >individual engines (we have to rewind the global seqno to avoid
> >overflow, and we do not have to rewind all engines just to preempt one.)
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> > drivers/gpu/drm/i915/i915_debugfs.c      |  5 +--
> > drivers/gpu/drm/i915/i915_gem_request.c  | 68 +++++++++++++++-----------------
> > drivers/gpu/drm/i915/i915_gem_request.h  |  8 +---
> > drivers/gpu/drm/i915/i915_gem_timeline.h |  4 +-
> > drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 +++++++---------
> > drivers/gpu/drm/i915/intel_engine_cs.c   |  2 -
> > drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
> > 7 files changed, 52 insertions(+), 72 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> >index cda957c674ee..9b636962cab6 100644
> >--- a/drivers/gpu/drm/i915/i915_debugfs.c
> >+++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >@@ -1080,10 +1080,7 @@ static const struct file_operations i915_error_state_fops = {
> > static int
> > i915_next_seqno_get(void *data, u64 *val)
> > {
> >-	struct drm_i915_private *dev_priv = data;
> >-
> >-	*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
> >-	return 0;
> >+	return -ENODEV;
> 
> I assume reason for leaving this function in this state appears in a
> later patch? gt.global_timeline stays around for something else?

There's no longer a single global seqno, so we tell userspace (igt) it can't
have it.

> > }
> >
> > static int
> >diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> >index 24571a5289a4..2d84da0e2b39 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_request.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_request.c
> >@@ -213,7 +213,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> > 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
> > 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
> > 	GEM_BUG_ON(!i915_gem_request_completed(request));
> >+
> > 	GEM_BUG_ON(!request->i915->gt.active_requests);
> >+	GEM_BUG_ON(!request->engine->timeline->active_seqno);
> >
> > 	trace_i915_gem_request_retire(request);
> >
> >@@ -237,6 +239,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> > 				 &request->i915->gt.idle_work,
> > 				 msecs_to_jiffies(100));
> > 	}
> >+	request->engine->timeline->active_seqno--;
> 
> Hm, decrementing the seqno, a new concept in our codebase. There is
> no comment where the structure field is added so I guess it's
> reverse engineering time again. :(

I thought the switch from active_requests to active_seqno would be
apparent.

> Back few minutes later - so this is a count of in flight requests?
> Should you call it active_seqnos then so it becomes obvious it is
> not a seqno as we know it?

Yes, Joonas suggested active_seqnos as well. I was trying to restrict
myself to only talking about seqno wrt to timelines.

> It sucks a bit that the decrement is in the infamous retire worker.
> I wonder how hard would it be to DoS the seqno space with it.

Harder now than the active_request space, i.e. virtually impossible. We
can only have 1^20 contexts and each context can only support around
1^10 requests (at least userspace requests, kernel requests can be more
compact, but still larger than 16bytes!).
 
> Protected by the timeline spinlock?

struct_mutex. (Still thinking about what mutex it will be in the
future.)
 
> Would the design work to assume GuC scheduler backend is in and to
> put in flight seqno accounting close to requests coming in and out
> from the hardware?

No, it is currently designed around being a coarse counter integrating
into the runtime pm framework as well as the mempools.

> > 	/* Walk through the active list, calling retire on each. This allows
> > 	 * objects to track their GPU activity and mark themselves as idle
> >@@ -325,15 +328,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
> > 	GEM_BUG_ON(i915->gt.active_requests > 1);
> >
> > 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
> >-	if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
> >-		while (intel_breadcrumbs_busy(i915))
> >-			cond_resched(); /* spin until threads are complete */
> >-	}
> >-	atomic_set(&timeline->seqno, seqno);
> >+	for_each_engine(engine, i915, id) {
> >+		struct intel_timeline *tl = &timeline->engine[id];
> >
> >-	/* Finally reset hw state */
> >-	for_each_engine(engine, i915, id)
> >+		if (!i915_seqno_passed(seqno, tl->seqno)) {
> >+			/* spin until threads are complete */
> >+			while (intel_breadcrumbs_busy(engine))
> >+				cond_resched();
> >+		}
> >+
> >+		/* Finally reset hw state */
> >+		tl->seqno = seqno;
> > 		intel_engine_init_global_seqno(engine, seqno);
> >+	}
> 
> Came back here a bit later. Shouldn't you just handle one engine in
> this function if seqnos are per-engine now?

No. We still have multiple engines listening to the seqno of others
(legacy semaphores). So if we wraparound on RCS we have to idle xCS to
be sure they complete any semaphores (semaphores check for >= value, so
if we set future requests to be smaller, they have to wait for a long
time before a new RCS request overtakes the semaphore value).

> > 	list_for_each_entry(timeline, &i915->gt.timelines, link) {
> > 		for_each_engine(engine, i915, id) {
> >@@ -361,34 +368,28 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
> > 	return i915_gem_init_global_seqno(dev_priv, seqno - 1);
> > }
> >
> >-static int reserve_global_seqno(struct drm_i915_private *i915)
> >+static int reserve_global_seqno(struct intel_engine_cs *engine)
> 
> Rename to reserve_engine_seqno?

Yes.

> >-	u32 active_requests = ++i915->gt.active_requests;
> >-	u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
> >+	u32 active = ++engine->timeline->active_seqno;
> >+	u32 seqno = engine->timeline->seqno;
> > 	int ret;
> >
> > 	/* Reservation is fine until we need to wrap around */
> >-	if (likely(seqno + active_requests > seqno))
> >+	if (likely(seqno + active > seqno))
> 
> Case for one of the overflows type helpers?

add_overflows

if (likely(!add_overflows(seqno, active))

> 
> > 		return 0;
> >
> >-	ret = i915_gem_init_global_seqno(i915, 0);
> >+	ret = i915_gem_init_global_seqno(engine->i915, 0);
> 
> i915_gem_init_engine_seqno(engine) ?

No. Still retains global scope, and definitely wants to keep that
nuisance front and centre.

> > 	if (ret) {
> >-		i915->gt.active_requests--;
> >+		engine->timeline->active_seqno--;
> 
> It would be better for the active seqno count to be managed on the
> same level for readability. By that I mean having the decrement in
> add_request where it was incremented.

It's incremented in this function, so the unwind on error is here as
well.

> > 		return ret;
> > 	}
> >
> > 	return 0;
> > }
> >
> >-static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
> >+static u32 timeline_get_seqno(struct intel_timeline *tl)
> > {
> >-	/* seqno only incremented under a mutex */
> >-	return ++tl->seqno.counter;
> >-}
> >-
> >-static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
> >-{
> >-	return atomic_inc_return(&tl->seqno);
> >+	return ++tl->seqno;
> > }
> >
> > void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> >@@ -402,14 +403,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> > 	GEM_BUG_ON(timeline == request->timeline);
> > 	assert_spin_locked(&timeline->lock);
> >
> >-	seqno = timeline_get_seqno(timeline->common);
> >+	seqno = timeline_get_seqno(timeline);
> > 	GEM_BUG_ON(!seqno);
> > 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
> >
> >-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
> >-	request->previous_seqno = timeline->last_submitted_seqno;
> >-	timeline->last_submitted_seqno = seqno;
> >-
> > 	/* We may be recursing from the signal callback of another i915 fence */
> > 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> > 	request->global_seqno = seqno;
> 
> This field could also be renamed to engine_seqno to be more
> self-documenting.

That's going to be wide-sweeping change, let's see what it looks like,
e.g. i915_gem_request_get_engine_seqno()

On the other hand, I thought I called the timeline "[global]"
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine
  2017-02-15 21:49     ` Chris Wilson
@ 2017-02-15 22:20       ` Chris Wilson
  2017-02-15 22:36       ` Chris Wilson
  2017-02-16  8:10       ` Tvrtko Ursulin
  2 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-15 22:20 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On Wed, Feb 15, 2017 at 09:49:41PM +0000, Chris Wilson wrote:
> > Protected by the timeline spinlock?
> 
> struct_mutex. (Still thinking about what mutex it will be in the
> future.)

Hmm, was thinking it have already replaced active_requests. However, it
is still tied to request allocation/deallocation so at the moment very
much limited to struct_mutex paths.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine
  2017-02-15 21:49     ` Chris Wilson
  2017-02-15 22:20       ` Chris Wilson
@ 2017-02-15 22:36       ` Chris Wilson
  2017-02-16  8:10       ` Tvrtko Ursulin
  2 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-15 22:36 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On Wed, Feb 15, 2017 at 09:49:41PM +0000, Chris Wilson wrote:
> > >-static int reserve_global_seqno(struct drm_i915_private *i915)
> > >+static int reserve_global_seqno(struct intel_engine_cs *engine)
> > 
> > Rename to reserve_engine_seqno?
> 
> Yes.
> 
> > >-	u32 active_requests = ++i915->gt.active_requests;
> > >-	u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
> > >+	u32 active = ++engine->timeline->active_seqno;
> > >+	u32 seqno = engine->timeline->seqno;
> > > 	int ret;
> > >
> > > 	/* Reservation is fine until we need to wrap around */
> > >-	if (likely(seqno + active_requests > seqno))
> > >+	if (likely(seqno + active > seqno))
> > 
> > Case for one of the overflows type helpers?
> 
> add_overflows
> 
> if (likely(!add_overflows(seqno, active))
> 
> > 
> > > 		return 0;
> > >
> > >-	ret = i915_gem_init_global_seqno(i915, 0);
> > >+	ret = i915_gem_init_global_seqno(engine->i915, 0);
> > 
> > i915_gem_init_engine_seqno(engine) ?
> 
> No. Still retains global scope, and definitely wants to keep that
> nuisance front and centre.
> 
> > > 	if (ret) {
> > >-		i915->gt.active_requests--;
> > >+		engine->timeline->active_seqno--;
> > 
> > It would be better for the active seqno count to be managed on the
> > same level for readability. By that I mean having the decrement in
> > add_request where it was incremented.
> 
> It's incremented in this function, so the unwind on error is here as
> well.

Ah, I guess you were referring to the decrement in request_alloc. Pulled
that out to unreserve_seqno() to match the call to reserve_seqno().
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine
  2017-02-15 21:49     ` Chris Wilson
  2017-02-15 22:20       ` Chris Wilson
  2017-02-15 22:36       ` Chris Wilson
@ 2017-02-16  8:10       ` Tvrtko Ursulin
  2017-02-16  8:28         ` Chris Wilson
  2 siblings, 1 reply; 30+ messages in thread
From: Tvrtko Ursulin @ 2017-02-16  8:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 15/02/2017 21:49, Chris Wilson wrote:
> On Wed, Feb 15, 2017 at 05:05:40PM +0000, Tvrtko Ursulin wrote:
>>
>> On 14/02/2017 09:54, Chris Wilson wrote:
>>> Replace the global device seqno with one for each engine, and account
>>> for in-flight seqno on each separately. This is consistent with
>>> dma-fence as each timeline has separate fence-contexts for each engine
>>> and a seqno is only ordered within a fence-context (i.e.  seqno do not
>>> need to be ordered wrt to other engines, just ordered within a single
>>> engine). This is required to enable request rewinding for preemption on
>>> individual engines (we have to rewind the global seqno to avoid
>>> overflow, and we do not have to rewind all engines just to preempt one.)
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>> drivers/gpu/drm/i915/i915_debugfs.c      |  5 +--
>>> drivers/gpu/drm/i915/i915_gem_request.c  | 68 +++++++++++++++-----------------
>>> drivers/gpu/drm/i915/i915_gem_request.h  |  8 +---
>>> drivers/gpu/drm/i915/i915_gem_timeline.h |  4 +-
>>> drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 +++++++---------
>>> drivers/gpu/drm/i915/intel_engine_cs.c   |  2 -
>>> drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
>>> 7 files changed, 52 insertions(+), 72 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index cda957c674ee..9b636962cab6 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -1080,10 +1080,7 @@ static const struct file_operations i915_error_state_fops = {
>>> static int
>>> i915_next_seqno_get(void *data, u64 *val)
>>> {
>>> -	struct drm_i915_private *dev_priv = data;
>>> -
>>> -	*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
>>> -	return 0;
>>> +	return -ENODEV;
>>
>> I assume reason for leaving this function in this state appears in a
>> later patch? gt.global_timeline stays around for something else?
>
> There's no longer a single global seqno, so we tell userspace (igt) it can't
> have it.

I missed this is debugfs and that we even have this facility. Does the 
exact errno matters here? Thinking of just dropping the vfunc entirely 
and letting the core return an error. After looking it up it seems it 
would be -EACCES.

>>> @@ -325,15 +328,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
>>> 	GEM_BUG_ON(i915->gt.active_requests > 1);
>>>
>>> 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
>>> -	if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
>>> -		while (intel_breadcrumbs_busy(i915))
>>> -			cond_resched(); /* spin until threads are complete */
>>> -	}
>>> -	atomic_set(&timeline->seqno, seqno);
>>> +	for_each_engine(engine, i915, id) {
>>> +		struct intel_timeline *tl = &timeline->engine[id];
>>>
>>> -	/* Finally reset hw state */
>>> -	for_each_engine(engine, i915, id)
>>> +		if (!i915_seqno_passed(seqno, tl->seqno)) {
>>> +			/* spin until threads are complete */
>>> +			while (intel_breadcrumbs_busy(engine))
>>> +				cond_resched();
>>> +		}
>>> +
>>> +		/* Finally reset hw state */
>>> +		tl->seqno = seqno;
>>> 		intel_engine_init_global_seqno(engine, seqno);
>>> +	}
>>
>> Came back here a bit later. Shouldn't you just handle one engine in
>> this function if seqnos are per-engine now?
>
> No. We still have multiple engines listening to the seqno of others
> (legacy semaphores). So if we wraparound on RCS we have to idle xCS to
> be sure they complete any semaphores (semaphores check for >= value, so
> if we set future requests to be smaller, they have to wait for a long
> time before a new RCS request overtakes the semaphore value).

Ah right, forgot about semaphores.

>>> 	/* We may be recursing from the signal callback of another i915 fence */
>>> 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
>>> 	request->global_seqno = seqno;
>>
>> This field could also be renamed to engine_seqno to be more
>> self-documenting.
>
> That's going to be wide-sweeping change, let's see what it looks like,
> e.g. i915_gem_request_get_engine_seqno()
>
> On the other hand, I thought I called the timeline "[global]"

Okay if it is too much churn never mind then. I guess we can think of 
req->global_seqno as global to the engine ourselves. :

 >>> It would be better for the active seqno count to be managed on the
 >>> same level for readability. By that I mean having the decrement in
 >>> add_request where it was incremented.
 >>
 >> It's incremented in this function, so the unwind on error is here as
 >> well.
 >
 > Ah, I guess you were referring to the decrement in request_alloc. Pulled
 > that out to unreserve_seqno() to match the call to reserve_seqno().

Yes, I said the wrong thing. Got confused by jumping back and forth.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine
  2017-02-16  8:10       ` Tvrtko Ursulin
@ 2017-02-16  8:28         ` Chris Wilson
  0 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-16  8:28 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Thu, Feb 16, 2017 at 08:10:07AM +0000, Tvrtko Ursulin wrote:
> 
> On 15/02/2017 21:49, Chris Wilson wrote:
> >On Wed, Feb 15, 2017 at 05:05:40PM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 14/02/2017 09:54, Chris Wilson wrote:
> >>>Replace the global device seqno with one for each engine, and account
> >>>for in-flight seqno on each separately. This is consistent with
> >>>dma-fence as each timeline has separate fence-contexts for each engine
> >>>and a seqno is only ordered within a fence-context (i.e.  seqno do not
> >>>need to be ordered wrt to other engines, just ordered within a single
> >>>engine). This is required to enable request rewinding for preemption on
> >>>individual engines (we have to rewind the global seqno to avoid
> >>>overflow, and we do not have to rewind all engines just to preempt one.)
> >>>
> >>>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>---
> >>>drivers/gpu/drm/i915/i915_debugfs.c      |  5 +--
> >>>drivers/gpu/drm/i915/i915_gem_request.c  | 68 +++++++++++++++-----------------
> >>>drivers/gpu/drm/i915/i915_gem_request.h  |  8 +---
> >>>drivers/gpu/drm/i915/i915_gem_timeline.h |  4 +-
> >>>drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 +++++++---------
> >>>drivers/gpu/drm/i915/intel_engine_cs.c   |  2 -
> >>>drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
> >>>7 files changed, 52 insertions(+), 72 deletions(-)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> >>>index cda957c674ee..9b636962cab6 100644
> >>>--- a/drivers/gpu/drm/i915/i915_debugfs.c
> >>>+++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >>>@@ -1080,10 +1080,7 @@ static const struct file_operations i915_error_state_fops = {
> >>>static int
> >>>i915_next_seqno_get(void *data, u64 *val)
> >>>{
> >>>-	struct drm_i915_private *dev_priv = data;
> >>>-
> >>>-	*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
> >>>-	return 0;
> >>>+	return -ENODEV;
> >>
> >>I assume reason for leaving this function in this state appears in a
> >>later patch? gt.global_timeline stays around for something else?
> >
> >There's no longer a single global seqno, so we tell userspace (igt) it can't
> >have it.
> 
> I missed this is debugfs and that we even have this facility. Does
> the exact errno matters here? Thinking of just dropping the vfunc
> entirely and letting the core return an error. After looking it up
> it seems it would be -EACCES.

The errno doesn't matter. ENODEV is my dirty habit.

It's used by just one test gem_seqno_wrap, which sets a value and then
expects the read to match after a batch. I incorporated the
seqno-wrapping check into gem_exec_whisper, which checks the
synchronisation and ordering of requests between engines across the
wrap. It has been very useful for detecting bugs.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute()
  2017-02-14  9:54 ` [PATCH v2 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
@ 2017-02-17 14:04   ` Tvrtko Ursulin
  0 siblings, 0 replies; 30+ messages in thread
From: Tvrtko Ursulin @ 2017-02-17 14:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/02/2017 09:54, Chris Wilson wrote:
> It had only one callsite and existed to keep the code clearer. Now
> having shared the wait-on-error between phases and with plans to change
> the wait-for-execute in the next few patches, remove the out of line
> wait loop and move it into the main body of i915_wait_request.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 78 ++++++++++++---------------------
>  1 file changed, 29 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index dcc0a7ab95dc..001fc9fedf49 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -988,54 +988,6 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
>  	return false;
>  }
>
> -static long
> -__i915_request_wait_for_execute(struct drm_i915_gem_request *request,
> -				unsigned int flags,
> -				long timeout)
> -{
> -	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
> -		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
> -	wait_queue_head_t *q = &request->i915->gpu_error.wait_queue;
> -	DEFINE_WAIT(reset);
> -	DEFINE_WAIT(wait);
> -
> -	if (flags & I915_WAIT_LOCKED)
> -		add_wait_queue(q, &reset);
> -
> -	do {
> -		prepare_to_wait(&request->execute.wait, &wait, state);
> -
> -		if (i915_sw_fence_done(&request->execute))
> -			break;
> -
> -		if (flags & I915_WAIT_LOCKED &&
> -		    i915_reset_in_progress(&request->i915->gpu_error)) {
> -			__set_current_state(TASK_RUNNING);
> -			i915_reset(request->i915);
> -			reset_wait_queue(q, &reset);
> -			continue;
> -		}
> -
> -		if (signal_pending_state(state, current)) {
> -			timeout = -ERESTARTSYS;
> -			break;
> -		}
> -
> -		if (!timeout) {
> -			timeout = -ETIME;
> -			break;
> -		}
> -
> -		timeout = io_schedule_timeout(timeout);
> -	} while (1);
> -	finish_wait(&request->execute.wait, &wait);
> -
> -	if (flags & I915_WAIT_LOCKED)
> -		remove_wait_queue(q, &reset);
> -
> -	return timeout;
> -}
> -
>  /**
>   * i915_wait_request - wait until execution of request has finished
>   * @req: the request to wait upon
> @@ -1085,7 +1037,35 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		add_wait_queue(errq, &reset);
>
>  	if (!i915_sw_fence_done(&req->execute)) {
> -		timeout = __i915_request_wait_for_execute(req, flags, timeout);
> +		DEFINE_WAIT(exec);
> +
> +		do {
> +			prepare_to_wait(&req->execute.wait, &exec, state);
> +			if (i915_sw_fence_done(&req->execute))
> +				break;
> +
> +			if (flags & I915_WAIT_LOCKED &&
> +			    i915_reset_in_progress(&req->i915->gpu_error)) {
> +				__set_current_state(TASK_RUNNING);
> +				i915_reset(req->i915);
> +				reset_wait_queue(errq, &reset);
> +				continue;
> +			}
> +
> +			if (signal_pending_state(state, current)) {
> +				timeout = -ERESTARTSYS;
> +				break;
> +			}
> +
> +			if (!timeout) {
> +				timeout = -ETIME;
> +				break;
> +			}
> +
> +			timeout = io_schedule_timeout(timeout);
> +		} while (1);
> +		finish_wait(&req->execute.wait, &exec);
> +
>  		if (timeout < 0)
>  			goto complete;
>
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 05/14] drm/i915: Deconstruct execute fence
  2017-02-14  9:54 ` [PATCH v2 05/14] drm/i915: Deconstruct execute fence Chris Wilson
@ 2017-02-17 14:26   ` Tvrtko Ursulin
  2017-02-17 14:41     ` Chris Wilson
  0 siblings, 1 reply; 30+ messages in thread
From: Tvrtko Ursulin @ 2017-02-17 14:26 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/02/2017 09:54, Chris Wilson wrote:
> On reflection, we are only using the execute fence as a waitqueue on the
> global_seqno and not using it for dependency tracking between fences
> (unlike the submit and dma fences). By only treating it as a waitqueue,
> we can then treat it similar to the other waitqueues durin submit,

during

> making the code simpler.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 47 +++++++--------------------------
>  drivers/gpu/drm/i915/i915_gem_request.h | 10 +------
>  2 files changed, 11 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 001fc9fedf49..bb59acaa8a34 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -69,7 +69,6 @@ static void i915_fence_release(struct dma_fence *fence)
>  	 * caught trying to reuse dead objects.
>  	 */
>  	i915_sw_fence_fini(&req->submit);
> -	i915_sw_fence_fini(&req->execute);
>
>  	kmem_cache_free(req->i915->requests, req);
>  }
> @@ -211,7 +210,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>
>  	lockdep_assert_held(&request->i915->drm.struct_mutex);
>  	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
> -	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
>  	GEM_BUG_ON(!i915_gem_request_completed(request));
>
>  	GEM_BUG_ON(!request->i915->gt.active_requests);
> @@ -422,7 +420,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
>  	list_move_tail(&request->link, &timeline->requests);
>  	spin_unlock(&request->timeline->lock);
>
> -	i915_sw_fence_commit(&request->execute);
> +	wake_up_all(&request->execute);
>  }
>
>  void i915_gem_request_submit(struct drm_i915_gem_request *request)
> @@ -457,24 +455,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
>  	return NOTIFY_DONE;
>  }
>
> -static int __i915_sw_fence_call
> -execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> -{
> -	struct drm_i915_gem_request *request =
> -		container_of(fence, typeof(*request), execute);
> -
> -	switch (state) {
> -	case FENCE_COMPLETE:
> -		break;
> -
> -	case FENCE_FREE:
> -		i915_gem_request_put(request);
> -		break;
> -	}
> -
> -	return NOTIFY_DONE;
> -}
> -
>  /**
>   * i915_gem_request_alloc - allocate a request structure
>   *
> @@ -567,13 +547,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>
>  	/* We bump the ref for the fence chain */
>  	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
> -	i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify);
> -
> -	/* Ensure that the execute fence completes after the submit fence -
> -	 * as we complete the execute fence from within the submit fence
> -	 * callback, its completion would otherwise be visible first.
> -	 */
> -	i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq);
> +	init_waitqueue_head(&req->execute);
>
>  	i915_priotree_init(&req->priotree);
>
> @@ -1015,6 +989,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
>  	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
>  	DEFINE_WAIT(reset);
> +	DEFINE_WAIT(exec);
>  	struct intel_wait wait;
>
>  	might_sleep();
> @@ -1036,12 +1011,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  	if (flags & I915_WAIT_LOCKED)
>  		add_wait_queue(errq, &reset);
>
> -	if (!i915_sw_fence_done(&req->execute)) {
> -		DEFINE_WAIT(exec);
> -
> +	reset_wait_queue(&req->execute, &exec);
> +	if (!req->global_seqno) {
>  		do {
> -			prepare_to_wait(&req->execute.wait, &exec, state);

Somehow I missed the moment when reset_wait_queue was introduced. But 
why you can't just use prepare_to_wait here?

Otherwise looks OK.

Regards,

Tvrtko

> -			if (i915_sw_fence_done(&req->execute))
> +			set_current_state(state);
> +			if (req->global_seqno)
>  				break;
>
>  			if (flags & I915_WAIT_LOCKED &&
> @@ -1064,15 +1038,14 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>
>  			timeout = io_schedule_timeout(timeout);
>  		} while (1);
> -		finish_wait(&req->execute.wait, &exec);
> +		finish_wait(&req->execute, &exec);
>
>  		if (timeout < 0)
>  			goto complete;
>
> -		GEM_BUG_ON(!i915_sw_fence_done(&req->execute));
> +		GEM_BUG_ON(!req->global_seqno);
>  	}
> -	GEM_BUG_ON(!i915_sw_fence_done(&req->submit));
> -	GEM_BUG_ON(!req->global_seqno);
> +	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
>
>  	/* Optimistic short spin before touching IRQs */
>  	if (i915_spin_request(req, state, 5))
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 9049936c571c..467d3e13fce0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -119,18 +119,10 @@ struct drm_i915_gem_request {
>  	 * The submit fence is used to await upon all of the request's
>  	 * dependencies. When it is signaled, the request is ready to run.
>  	 * It is used by the driver to then queue the request for execution.
> -	 *
> -	 * The execute fence is used to signal when the request has been
> -	 * sent to hardware.
> -	 *
> -	 * It is illegal for the submit fence of one request to wait upon the
> -	 * execute fence of an earlier request. It should be sufficient to
> -	 * wait upon the submit fence of the earlier request.
>  	 */
>  	struct i915_sw_fence submit;
> -	struct i915_sw_fence execute;
>  	wait_queue_t submitq;
> -	wait_queue_t execq;
> +	wait_queue_head_t execute;
>
>  	/* A list of everyone we wait upon, and everyone who waits upon us.
>  	 * Even though we will not be submitted to the hardware before the
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 05/14] drm/i915: Deconstruct execute fence
  2017-02-17 14:26   ` Tvrtko Ursulin
@ 2017-02-17 14:41     ` Chris Wilson
  2017-02-17 14:55       ` Tvrtko Ursulin
  0 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2017-02-17 14:41 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Feb 17, 2017 at 02:26:04PM +0000, Tvrtko Ursulin wrote:
> 
> On 14/02/2017 09:54, Chris Wilson wrote:
> >@@ -1036,12 +1011,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> > 	if (flags & I915_WAIT_LOCKED)
> > 		add_wait_queue(errq, &reset);
> >
> >-	if (!i915_sw_fence_done(&req->execute)) {
> >-		DEFINE_WAIT(exec);
> >-
> >+	reset_wait_queue(&req->execute, &exec);
> >+	if (!req->global_seqno) {
> > 		do {
> >-			prepare_to_wait(&req->execute.wait, &exec, state);
> 
> Somehow I missed the moment when reset_wait_queue was introduced.
> But why you can't just use prepare_to_wait here?

In a few patches, we starting looping around at this point, so reset is
required then (and choosing to do reset now just avoid a few lines
later on). Then in a few more patches, the reset_wait_queue is eliminated
entirely.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock
  2017-02-14  9:54 ` [PATCH v2 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
@ 2017-02-17 14:43   ` Tvrtko Ursulin
  2017-02-22 12:38     ` Chris Wilson
  0 siblings, 1 reply; 30+ messages in thread
From: Tvrtko Ursulin @ 2017-02-17 14:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/02/2017 09:54, Chris Wilson wrote:
> A request is assigned a global seqno only when it is on the hardware
> execution queue. The global seqno can be used to maintain a list of
> requests on the same engine in retirement order, for example for
> constructing a priority queue for waiting. Prior to its execution, or
> if it is subsequently removed in the event of preemption, its global
> seqno is zero. As both insertion and removal from the execution queue
> may operate in IRQ context, it is not guarded by the usual struct_mutex
> BKL. Instead those relying on the global seqno must be prepared for its
> value to change between reads. Only when the request is complete can
> the global seqno be stable (due to the memory barriers on submitting
> the commands to the hardware to write the breadcrumb, if the HWS shows
> that it has passed the global seqno and the global seqno is unchanged
> after the read, it is indeed complete).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          | 16 ++++++--
>  drivers/gpu/drm/i915/i915_gem.c          | 16 +++++---
>  drivers/gpu/drm/i915/i915_gem_request.c  | 46 ++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_request.h  | 66 +++++++++++++++++++++++++-------
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 ++++--
>  5 files changed, 114 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 251b2d66407e..cb66fc33cab6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -4005,14 +4005,24 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
>  }
>
>  static inline bool
> -__i915_request_irq_complete(struct drm_i915_gem_request *req)
> +__i915_request_irq_complete(const struct drm_i915_gem_request *req)
>  {
>  	struct intel_engine_cs *engine = req->engine;
> +	u32 seqno = i915_gem_request_global_seqno(req);
> +
> +	/* The request was dequeued before we were awoken. We check after
> +	 * inspecting the hw to confirm that this was the same request
> +	 * that generated the HWS update. The memory barriers within
> +	 * the request execution are sufficient to ensure that a check
> +	 * after reading the value from hw matches this request.
> +	 */
> +	if (!seqno)
> +		return false;
>
>  	/* Before we do the heavier coherent read of the seqno,
>  	 * check the value (hopefully) in the CPU cacheline.
>  	 */
> -	if (__i915_gem_request_completed(req))
> +	if (__i915_gem_request_completed(req, seqno))
>  		return true;
>
>  	/* Ensure our read of the seqno is coherent so that we
> @@ -4063,7 +4073,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
>  			wake_up_process(tsk);
>  		rcu_read_unlock();
>
> -		if (__i915_gem_request_completed(req))
> +		if (__i915_gem_request_completed(req, seqno))
>  			return true;
>  	}
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 766820ae9985..a7eea5ff44ca 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -400,7 +400,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
>  	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
>  		i915_gem_request_retire_upto(rq);
>
> -	if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
> +	if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
>  		/* The GPU is now idle and this client has stalled.
>  		 * Since no other client has submitted a request in the
>  		 * meantime, assume that this client is the only one
> @@ -2612,7 +2612,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
>  struct drm_i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_gem_request *request;
> +	struct drm_i915_gem_request *request, *active = NULL;
> +	unsigned long flags;
>
>  	/* We are called by the error capture and reset at a random
>  	 * point in time. In particular, note that neither is crucially
> @@ -2622,17 +2623,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
>  	 * extra delay for a recent interrupt is pointless. Hence, we do
>  	 * not need an engine->irq_seqno_barrier() before the seqno reads.
>  	 */
> +	spin_lock_irqsave(&engine->timeline->lock, flags);
>  	list_for_each_entry(request, &engine->timeline->requests, link) {
> -		if (__i915_gem_request_completed(request))
> +		if (__i915_gem_request_completed(request,
> +						 request->global_seqno))
>  			continue;
>
>  		GEM_BUG_ON(request->engine != engine);
>  		GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>  				    &request->fence.flags));
> -		return request;
> +
> +		active = request;
> +		break;
>  	}
> +	spin_unlock_irqrestore(&engine->timeline->lock, flags);
>
> -	return NULL;
> +	return active;
>  }
>
>  static bool engine_stalled(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index bb59acaa8a34..5db4fd1eabcd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -412,7 +412,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
>  		intel_engine_enable_signaling(request);
>  	spin_unlock(&request->lock);
>
> -	GEM_BUG_ON(!request->global_seqno);
>  	engine->emit_breadcrumb(request,
>  				request->ring->vaddr + request->postfix);
>
> @@ -498,7 +497,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	/* Move the oldest request to the slab-cache (if not in use!) */
>  	req = list_first_entry_or_null(&engine->timeline->requests,
>  				       typeof(*req), link);
> -	if (req && __i915_gem_request_completed(req))
> +	if (req && i915_gem_request_completed(req))
>  		i915_gem_request_retire(req);
>
>  	/* Beware: Dragons be flying overhead.
> @@ -604,6 +603,7 @@ static int
>  i915_gem_request_await_request(struct drm_i915_gem_request *to,
>  			       struct drm_i915_gem_request *from)
>  {
> +	u32 seqno;
>  	int ret;
>
>  	GEM_BUG_ON(to == from);
> @@ -626,14 +626,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
>  		return ret < 0 ? ret : 0;
>  	}
>
> -	if (!from->global_seqno) {
> +	seqno = i915_gem_request_global_seqno(from);
> +	if (!seqno) {
>  		ret = i915_sw_fence_await_dma_fence(&to->submit,
>  						    &from->fence, 0,
>  						    GFP_KERNEL);
>  		return ret < 0 ? ret : 0;
>  	}
>
> -	if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
> +	if (seqno <= to->timeline->sync_seqno[from->engine->id])
>  		return 0;
>
>  	trace_i915_gem_ring_sync_to(to, from);
> @@ -651,7 +652,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
>  			return ret;
>  	}
>
> -	to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
> +	to->timeline->sync_seqno[from->engine->id] = seqno;
>  	return 0;
>  }
>
> @@ -931,7 +932,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
>  }
>
>  bool __i915_spin_request(const struct drm_i915_gem_request *req,
> -			 int state, unsigned long timeout_us)
> +			 u32 seqno, int state, unsigned long timeout_us)
>  {
>  	unsigned int cpu;
>
> @@ -947,7 +948,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
>
>  	timeout_us += local_clock_us(&cpu);
>  	do {
> -		if (__i915_gem_request_completed(req))
> +		if (seqno != i915_gem_request_global_seqno(req))
> +			break;

You don't want to keep spinning for the allotted timeslice after the 
seqno transitions from zero to something and if is the currently 
executing seqno?

> +
> +		if (i915_seqno_passed(intel_engine_get_seqno(req->engine),
> +				      seqno))
>  			return true;
>
>  		if (signal_pending_state(state, current))
> @@ -1011,11 +1016,15 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  	if (flags & I915_WAIT_LOCKED)
>  		add_wait_queue(errq, &reset);
>
> +	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
> +
>  	reset_wait_queue(&req->execute, &exec);
> -	if (!req->global_seqno) {
> +	if (!wait.seqno) {
>  		do {
>  			set_current_state(state);
> -			if (req->global_seqno)
> +
> +			wait.seqno = i915_gem_request_global_seqno(req);
> +			if (wait.seqno)
>  				break;
>
>  			if (flags & I915_WAIT_LOCKED &&
> @@ -1043,7 +1052,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		if (timeout < 0)
>  			goto complete;
>
> -		GEM_BUG_ON(!req->global_seqno);
> +		GEM_BUG_ON(!wait.seqno);
>  	}
>  	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
>
> @@ -1052,7 +1061,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		goto complete;
>
>  	set_current_state(state);
> -	intel_wait_init(&wait, req->global_seqno);
>  	if (intel_engine_add_wait(req->engine, &wait))
>  		/* In order to check that we haven't missed the interrupt
>  		 * as we enabled it, we need to kick ourselves to do a
> @@ -1073,7 +1081,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>
>  		timeout = io_schedule_timeout(timeout);
>
> -		if (intel_wait_complete(&wait))
> +		if (intel_wait_complete(&wait) &&
> +		    i915_gem_request_global_seqno(req) == wait.seqno)
>  			break;

Hm, the second part of the conditional sounds like it is always true. 
Since we know seqno is not zero, given that the first part of the wait 
has completed, so it has to be the expected one at this point. Otherwise 
a GEM_BUG_ON that it is different. Or I missed something?

>
>  		set_current_state(state);
> @@ -1124,14 +1133,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  static void engine_retire_requests(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_gem_request *request, *next;
> +	u32 seqno = intel_engine_get_seqno(engine);
> +	LIST_HEAD(retire);
>
> +	spin_lock_irq(&engine->timeline->lock);
>  	list_for_each_entry_safe(request, next,
>  				 &engine->timeline->requests, link) {
> -		if (!__i915_gem_request_completed(request))
> -			return;
> +		if (!i915_seqno_passed(seqno, request->global_seqno))
> +			break;
>
> -		i915_gem_request_retire(request);
> +		list_move_tail(&request->link, &retire);
>  	}
> +	spin_unlock_irq(&engine->timeline->lock);
> +
> +	list_for_each_entry_safe(request, next, &retire, link)
> +		i915_gem_request_retire(request);
>  }
>
>  void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 467d3e13fce0..b81f6709905c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -135,6 +135,11 @@ struct drm_i915_gem_request {
>  	struct i915_priotree priotree;
>  	struct i915_dependency dep;
>
> +	/** GEM sequence number associated with this request on the
> +	 * global execution timeline. It is zero when the request is not
> +	 * on the HW queue (i.e. not on the engine timeline list).
> +	 * Its value is guarded by the timeline spinlock.
> +	 */
>  	u32 global_seqno;
>
>  	/** Position in the ring of the start of the request */
> @@ -229,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
>  	*pdst = src;
>  }
>
> +/**
> + * i915_gem_request_global_seqno - report the current global seqno
> + * @request - the request
> + *
> + * A request is assigned a global seqno only when it is on the hardware
> + * execution queue. The global seqno can be used to maintain a list of
> + * requests on the same engine in retirement order, for example for
> + * constructing a priority queue for waiting. Prior to its execution, or
> + * if it is subsequently removed in the event of preemption, its global
> + * seqno is zero. As both insertion and removal from the execution queue
> + * may operate in IRQ context, it is not guarded by the usual struct_mutex
> + * BKL. Instead those relying on the global seqno must be prepared for its
> + * value to change between reads. Only when the request is complete can
> + * the global seqno be stable (due to the memory barriers on submitting
> + * the commands to the hardware to write the breadcrumb, if the HWS shows
> + * that it has passed the global seqno and the global seqno is unchanged
> + * after the read, it is indeed complete).
> + */
> +static u32
> +i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
> +{
> +	return READ_ONCE(request->global_seqno);
> +}
> +
>  int
>  i915_gem_request_await_object(struct drm_i915_gem_request *to,
>  			      struct drm_i915_gem_object *obj,
> @@ -269,46 +298,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
>  }
>
>  static inline bool
> -__i915_gem_request_started(const struct drm_i915_gem_request *req)
> +__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
>  {
> -	GEM_BUG_ON(!req->global_seqno);
> +	GEM_BUG_ON(!seqno);
>  	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> -				 req->global_seqno - 1);
> +				 seqno - 1);
>  }
>
>  static inline bool
>  i915_gem_request_started(const struct drm_i915_gem_request *req)
>  {
> -	if (!req->global_seqno)
> +	u32 seqno;
> +
> +	seqno = i915_gem_request_global_seqno(req);
> +	if (!seqno)
>  		return false;
>
> -	return __i915_gem_request_started(req);
> +	return __i915_gem_request_started(req, seqno);
>  }
>
>  static inline bool
> -__i915_gem_request_completed(const struct drm_i915_gem_request *req)
> +__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
>  {
> -	GEM_BUG_ON(!req->global_seqno);
> -	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> -				 req->global_seqno);
> +	GEM_BUG_ON(!seqno);
> +	return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
> +		seqno == i915_gem_request_global_seqno(req);
>  }
>
>  static inline bool
>  i915_gem_request_completed(const struct drm_i915_gem_request *req)
>  {
> -	if (!req->global_seqno)
> +	u32 seqno;
> +
> +	seqno = i915_gem_request_global_seqno(req);
> +	if (!seqno)
>  		return false;
>
> -	return __i915_gem_request_completed(req);
> +	return __i915_gem_request_completed(req, seqno);
>  }
>
>  bool __i915_spin_request(const struct drm_i915_gem_request *request,
> -			 int state, unsigned long timeout_us);
> +			 u32 seqno, int state, unsigned long timeout_us);
>  static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
>  				     int state, unsigned long timeout_us)
>  {
> -	return (__i915_gem_request_started(request) &&
> -		__i915_spin_request(request, state, timeout_us));
> +	u32 seqno;
> +
> +	seqno = i915_gem_request_global_seqno(request);
> +	return (__i915_gem_request_started(request, seqno) &&
> +		__i915_spin_request(request, seqno, state, timeout_us));
>  }
>
>  /* We treat requests as fences. This is not be to confused with our
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 4f859e423ef3..1af7b4814bfb 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -523,6 +523,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>  	struct intel_breadcrumbs *b = &engine->breadcrumbs;
>  	struct rb_node *parent, **p;
>  	bool first, wakeup;
> +	u32 seqno;
>
>  	/* Note that we may be called from an interrupt handler on another
>  	 * device (e.g. nouveau signaling a fence completion causing us
> @@ -533,11 +534,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>
>  	/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
>  	assert_spin_locked(&request->lock);
> -	if (!request->global_seqno)
> +
> +	seqno = i915_gem_request_global_seqno(request);
> +	if (!seqno)
>  		return;
>
>  	request->signaling.wait.tsk = b->signaler;
> -	request->signaling.wait.seqno = request->global_seqno;
> +	request->signaling.wait.seqno = seqno;
>  	i915_gem_request_get(request);
>
>  	spin_lock(&b->lock);
> @@ -561,8 +564,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>  	p = &b->signals.rb_node;
>  	while (*p) {
>  		parent = *p;
> -		if (i915_seqno_passed(request->global_seqno,
> -				      to_signaler(parent)->global_seqno)) {
> +		if (i915_seqno_passed(seqno,
> +				      to_signaler(parent)->signaling.wait.seqno)) {
>  			p = &parent->rb_right;
>  			first = false;
>  		} else {
>

The rest looks OK. But let me have another pass at some point.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 05/14] drm/i915: Deconstruct execute fence
  2017-02-17 14:41     ` Chris Wilson
@ 2017-02-17 14:55       ` Tvrtko Ursulin
  0 siblings, 0 replies; 30+ messages in thread
From: Tvrtko Ursulin @ 2017-02-17 14:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 17/02/2017 14:41, Chris Wilson wrote:
> On Fri, Feb 17, 2017 at 02:26:04PM +0000, Tvrtko Ursulin wrote:
>>
>> On 14/02/2017 09:54, Chris Wilson wrote:
>>> @@ -1036,12 +1011,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>>> 	if (flags & I915_WAIT_LOCKED)
>>> 		add_wait_queue(errq, &reset);
>>>
>>> -	if (!i915_sw_fence_done(&req->execute)) {
>>> -		DEFINE_WAIT(exec);
>>> -
>>> +	reset_wait_queue(&req->execute, &exec);
>>> +	if (!req->global_seqno) {
>>> 		do {
>>> -			prepare_to_wait(&req->execute.wait, &exec, state);
>>
>> Somehow I missed the moment when reset_wait_queue was introduced.
>> But why you can't just use prepare_to_wait here?
>
> In a few patches, we starting looping around at this point, so reset is
> required then (and choosing to do reset now just avoid a few lines
> later on). Then in a few more patches, the reset_wait_queue is eliminated
> entirely.

Fair enough;

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock
  2017-02-17 14:43   ` Tvrtko Ursulin
@ 2017-02-22 12:38     ` Chris Wilson
  0 siblings, 0 replies; 30+ messages in thread
From: Chris Wilson @ 2017-02-22 12:38 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Feb 17, 2017 at 02:43:05PM +0000, Tvrtko Ursulin wrote:
> 
> On 14/02/2017 09:54, Chris Wilson wrote:
> >@@ -947,7 +948,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
> >
> > 	timeout_us += local_clock_us(&cpu);
> > 	do {
> >-		if (__i915_gem_request_completed(req))
> >+		if (seqno != i915_gem_request_global_seqno(req))
> >+			break;
> 
> You don't want to keep spinning for the allotted timeslice after the
> seqno transitions from zero to something and if is the currently
> executing seqno?

The intent was that we only spin for the active (on hw) request. If it
was removed from the execution queue, it is unlikely(?) to be put back
as the next request.

Yes, I'd love to spin out the timeslice, used to give some nice boosts
to some synmarks that kept hitting readbacks. Hopefully, they've been
fixed now to use gpu pipelines!

> >@@ -1073,7 +1081,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> >
> > 		timeout = io_schedule_timeout(timeout);
> >
> >-		if (intel_wait_complete(&wait))
> >+		if (intel_wait_complete(&wait) &&
> >+		    i915_gem_request_global_seqno(req) == wait.seqno)
> > 			break;
> 
> Hm, the second part of the conditional sounds like it is always
> true. Since we know seqno is not zero, given that the first part of
> the wait has completed, so it has to be the expected one at this
> point. Otherwise a GEM_BUG_ON that it is different. Or I missed
> something?

Yes. We are preparing for global_seqno changing mid-wait due to request
reordering
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2017-02-22 12:38 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-14  9:53 Prep work for preemption and GEM bugfixes Chris Wilson
2017-02-14  9:54 ` [PATCH v2 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
2017-02-15 17:05   ` Tvrtko Ursulin
2017-02-15 21:49     ` Chris Wilson
2017-02-15 22:20       ` Chris Wilson
2017-02-15 22:36       ` Chris Wilson
2017-02-16  8:10       ` Tvrtko Ursulin
2017-02-16  8:28         ` Chris Wilson
2017-02-14  9:54 ` [PATCH v2 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
2017-02-15 17:06   ` Tvrtko Ursulin
2017-02-14  9:54 ` [PATCH v2 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
2017-02-15 17:11   ` Tvrtko Ursulin
2017-02-14  9:54 ` [PATCH v2 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
2017-02-17 14:04   ` Tvrtko Ursulin
2017-02-14  9:54 ` [PATCH v2 05/14] drm/i915: Deconstruct execute fence Chris Wilson
2017-02-17 14:26   ` Tvrtko Ursulin
2017-02-17 14:41     ` Chris Wilson
2017-02-17 14:55       ` Tvrtko Ursulin
2017-02-14  9:54 ` [PATCH v2 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
2017-02-17 14:43   ` Tvrtko Ursulin
2017-02-22 12:38     ` Chris Wilson
2017-02-14  9:54 ` [PATCH v2 07/14] drm/i915: Take a reference whilst processing the signaler request Chris Wilson
2017-02-14  9:54 ` [PATCH v2 08/14] drm/i915: Allow an request to be cancelled Chris Wilson
2017-02-14  9:54 ` [PATCH v2 09/14] drm/i915: Remove the preempted request from the execution queue Chris Wilson
2017-02-14  9:54 ` [PATCH v2 10/14] drm/i915: Exercise request cancellation using a mock selftest Chris Wilson
2017-02-14  9:54 ` [PATCH v2 11/14] drm/i915: Replace reset_wait_queue with default_wake_function Chris Wilson
2017-02-14  9:54 ` [PATCH v2 12/14] drm/i915: Refactor direct GPU reset from request waiters Chris Wilson
2017-02-14  9:54 ` [PATCH v2 13/14] drm/i915: Immediately process a reset before starting waiting Chris Wilson
2017-02-14  9:54 ` [PATCH v2 14/14] drm/i915: Remove one level of indention from wait-for-execute Chris Wilson
2017-02-14 11:52 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/14] drm/i915: Keep a global seqno per-engine Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.