All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/14] drm/i915: Keep a global seqno per-engine
@ 2017-02-02 15:12 Chris Wilson
  2017-02-02 15:13 ` [PATCH 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
                   ` (13 more replies)
  0 siblings, 14 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:12 UTC (permalink / raw)
  To: intel-gfx

Replace the global device seqno with one for each engine, and account
for in-flight seqno on each separately. This is consistent with
dma-fence as each timeline has separate fence-contexts for each engine
and a seqno is only ordered within a fence-context (i.e.  seqno do not
need to be ordered wrt to other engines, just ordered within a single
engine). This is required to enable request rewinding for preemption on
individual engines (we have to rewind the global seqno to avoid
overflow, and we do not have to rewind all engines just to preempt one.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  5 +--
 drivers/gpu/drm/i915/i915_gem_request.c  | 68 +++++++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_request.h  |  8 +---
 drivers/gpu/drm/i915/i915_gem_timeline.h |  4 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 +++++++---------
 drivers/gpu/drm/i915/intel_engine_cs.c   |  2 -
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
 7 files changed, 52 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3fc14820a151..4f4c3fd40b80 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1079,10 +1079,7 @@ static const struct file_operations i915_error_state_fops = {
 static int
 i915_next_seqno_get(void *data, u64 *val)
 {
-	struct drm_i915_private *dev_priv = data;
-
-	*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
-	return 0;
+	return -ENODEV;
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index bd2aeb290cad..d1ab4e3a8139 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -213,7 +213,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
 	GEM_BUG_ON(!i915_gem_request_completed(request));
+
 	GEM_BUG_ON(!request->i915->gt.active_requests);
+	GEM_BUG_ON(!request->engine->timeline->active_seqno);
 
 	trace_i915_gem_request_retire(request);
 
@@ -237,6 +239,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 				 &request->i915->gt.idle_work,
 				 msecs_to_jiffies(100));
 	}
+	request->engine->timeline->active_seqno--;
 
 	/* Walk through the active list, calling retire on each. This allows
 	 * objects to track their GPU activity and mark themselves as idle
@@ -325,15 +328,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
 	GEM_BUG_ON(i915->gt.active_requests > 1);
 
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-	if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
-		while (intel_breadcrumbs_busy(i915))
-			cond_resched(); /* spin until threads are complete */
-	}
-	atomic_set(&timeline->seqno, seqno);
+	for_each_engine(engine, i915, id) {
+		struct intel_timeline *tl = &timeline->engine[id];
 
-	/* Finally reset hw state */
-	for_each_engine(engine, i915, id)
+		if (!i915_seqno_passed(seqno, tl->seqno)) {
+			/* spin until threads are complete */
+			while (intel_breadcrumbs_busy(engine))
+				cond_resched();
+		}
+
+		/* Finally reset hw state */
+		tl->seqno = seqno;
 		intel_engine_init_global_seqno(engine, seqno);
+	}
 
 	list_for_each_entry(timeline, &i915->gt.timelines, link) {
 		for_each_engine(engine, i915, id) {
@@ -361,34 +368,28 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
 	return i915_gem_init_global_seqno(dev_priv, seqno - 1);
 }
 
-static int reserve_global_seqno(struct drm_i915_private *i915)
+static int reserve_global_seqno(struct intel_engine_cs *engine)
 {
-	u32 active_requests = ++i915->gt.active_requests;
-	u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
+	u32 active = ++engine->timeline->active_seqno;
+	u32 seqno = engine->timeline->seqno;
 	int ret;
 
 	/* Reservation is fine until we need to wrap around */
-	if (likely(seqno + active_requests > seqno))
+	if (likely(seqno + active > seqno))
 		return 0;
 
-	ret = i915_gem_init_global_seqno(i915, 0);
+	ret = i915_gem_init_global_seqno(engine->i915, 0);
 	if (ret) {
-		i915->gt.active_requests--;
+		engine->timeline->active_seqno--;
 		return ret;
 	}
 
 	return 0;
 }
 
-static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
+static u32 timeline_get_seqno(struct intel_timeline *tl)
 {
-	/* seqno only incremented under a mutex */
-	return ++tl->seqno.counter;
-}
-
-static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
-{
-	return atomic_inc_return(&tl->seqno);
+	return ++tl->seqno;
 }
 
 void __i915_gem_request_submit(struct drm_i915_gem_request *request)
@@ -402,14 +403,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(timeline == request->timeline);
 	assert_spin_locked(&timeline->lock);
 
-	seqno = timeline_get_seqno(timeline->common);
+	seqno = timeline_get_seqno(timeline);
 	GEM_BUG_ON(!seqno);
 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
 
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
-	request->previous_seqno = timeline->last_submitted_seqno;
-	timeline->last_submitted_seqno = seqno;
-
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = seqno;
@@ -514,7 +511,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = reserve_global_seqno(dev_priv);
+	ret = reserve_global_seqno(engine);
 	if (ret)
 		goto err_unpin;
 
@@ -566,7 +563,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       &i915_fence_ops,
 		       &req->lock,
 		       req->timeline->fence_context,
-		       __timeline_get_seqno(req->timeline->common));
+		       timeline_get_seqno(req->timeline));
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
@@ -611,6 +608,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 */
 	req->head = req->ring->tail;
 
+	/* Check that we didn't interrupt ourselves with a new request */
+	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
 	return req;
 
 err_ctx:
@@ -621,7 +620,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	kmem_cache_free(dev_priv->requests, req);
 err_unreserve:
-	dev_priv->gt.active_requests--;
+	engine->timeline->active_seqno--;
 err_unpin:
 	engine->context_unpin(engine, ctx);
 	return ERR_PTR(ret);
@@ -833,8 +832,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * our i915_gem_request_alloc() and called __i915_add_request() before
 	 * us, the timeline will hold its seqno which is later than ours.
 	 */
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-				     request->fence.seqno));
+	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
 
 	/*
 	 * To ensure that this call will not fail, space for its emissions
@@ -889,16 +887,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	list_add_tail(&request->link, &timeline->requests);
 	spin_unlock_irq(&timeline->lock);
 
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-				     request->fence.seqno));
-
-	timeline->last_submitted_seqno = request->fence.seqno;
+	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
 	i915_gem_active_set(&timeline->last_request, request);
 
 	list_add_tail(&request->ring_link, &ring->request_list);
 	request->emitted_jiffies = jiffies;
 
-	i915_gem_mark_busy(engine);
+	if (!request->i915->gt.active_requests++)
+		i915_gem_mark_busy(engine);
 
 	/* Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index ea511f06efaf..9049936c571c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -145,12 +145,6 @@ struct drm_i915_gem_request {
 
 	u32 global_seqno;
 
-	/** GEM sequence number associated with the previous request,
-	 * when the HWS breadcrumb is equal to this the GPU is processing
-	 * this request.
-	 */
-	u32 previous_seqno;
-
 	/** Position in the ring of the start of the request */
 	u32 head;
 
@@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
 	GEM_BUG_ON(!req->global_seqno);
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->previous_seqno);
+				 req->global_seqno - 1);
 }
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index f2e51f42cc2f..bc101e644143 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -33,7 +33,8 @@ struct i915_gem_timeline;
 
 struct intel_timeline {
 	u64 fence_context;
-	u32 last_submitted_seqno;
+	u32 seqno;
+	u32 active_seqno;
 
 	spinlock_t lock;
 
@@ -56,7 +57,6 @@ struct intel_timeline {
 
 struct i915_gem_timeline {
 	struct list_head link;
-	atomic_t seqno;
 
 	struct drm_i915_private *i915;
 	const char *name;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 1f36756f8759..6fc3ec9f639d 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -652,31 +652,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 	cancel_fake_irq(engine);
 }
 
-unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915)
+bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	unsigned int mask = 0;
-
-	for_each_engine(engine, i915, id) {
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-		spin_lock_irq(&b->lock);
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	bool busy = false;
 
-		if (b->first_wait) {
-			wake_up_process(b->first_wait->tsk);
-			mask |= intel_engine_flag(engine);
-		}
+	spin_lock_irq(&b->lock);
 
-		if (b->first_signal) {
-			wake_up_process(b->signaler);
-			mask |= intel_engine_flag(engine);
-		}
+	if (b->first_wait) {
+		wake_up_process(b->first_wait->tsk);
+		busy |= intel_engine_flag(engine);
+	}
 
-		spin_unlock_irq(&b->lock);
+	if (b->first_signal) {
+		wake_up_process(b->signaler);
+		busy |= intel_engine_flag(engine);
 	}
 
-	return mask;
+	spin_unlock_irq(&b->lock);
+
+	return busy;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 538d845d7251..1798b87fd934 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -254,8 +254,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 		engine->irq_seqno_barrier(engine);
 
 	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
-	engine->timeline->last_submitted_seqno = seqno;
-
 	engine->hangcheck.seqno = seqno;
 
 	/* After manually advancing the seqno, fake the interrupt in case
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c2f0ecf612b9..9e7fdbb92231 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -560,7 +560,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
 	 * wtih serialising this hint with anything, so document it as
 	 * a hint and nothing more.
 	 */
-	return READ_ONCE(engine->timeline->last_submitted_seqno);
+	return READ_ONCE(engine->timeline->seqno);
 }
 
 int init_workarounds_ring(struct intel_engine_cs *engine);
@@ -633,6 +633,6 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
 
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
+bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
 
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-14 11:17   ` Joonas Lahtinen
  2017-02-02 15:13 ` [PATCH 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
                   ` (12 subsequent siblings)
  13 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

Use a local variable to avoid having to type out the full name of the
gpu_error wait_queue.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index d1ab4e3a8139..fc1e340aa1e1 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1056,6 +1056,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
 	DEFINE_WAIT(reset);
 	struct intel_wait wait;
 
@@ -1091,7 +1092,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	set_current_state(state);
 	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+		add_wait_queue(errq, &reset);
 
 	intel_wait_init(&wait, req->global_seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
@@ -1142,8 +1143,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		    i915_reset_in_progress(&req->i915->gpu_error)) {
 			__set_current_state(TASK_RUNNING);
 			i915_reset(req->i915);
-			reset_wait_queue(&req->i915->gpu_error.wait_queue,
-					 &reset);
+			reset_wait_queue(errq, &reset);
 			continue;
 		}
 
@@ -1154,7 +1154,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	intel_engine_remove_wait(req->engine, &wait);
 	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+		remove_wait_queue(errq, &reset);
 	__set_current_state(TASK_RUNNING);
 
 complete:
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
  2017-02-02 15:13 ` [PATCH 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-14 11:25   ` Joonas Lahtinen
  2017-02-02 15:13 ` [PATCH 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
                   ` (11 subsequent siblings)
  13 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

Add ourselves to the gpu error waitqueue earllier on, even before we
determine we have to wait on the seqno. This is so that we can then
share the waitqueue between stages in subsequent patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index fc1e340aa1e1..8d4f0c859fee 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1076,6 +1076,9 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	trace_i915_gem_request_wait_begin(req);
 
+	if (flags & I915_WAIT_LOCKED)
+		add_wait_queue(errq, &reset);
+
 	if (!i915_sw_fence_done(&req->execute)) {
 		timeout = __i915_request_wait_for_execute(req, flags, timeout);
 		if (timeout < 0)
@@ -1091,9 +1094,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(errq, &reset);
-
 	intel_wait_init(&wait, req->global_seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
 		/* In order to check that we haven't missed the interrupt
@@ -1153,11 +1153,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	}
 
 	intel_engine_remove_wait(req->engine, &wait);
-	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(errq, &reset);
 	__set_current_state(TASK_RUNNING);
 
 complete:
+	if (flags & I915_WAIT_LOCKED)
+		remove_wait_queue(errq, &reset);
 	trace_i915_gem_request_wait_end(req);
 
 	return timeout;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute()
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
  2017-02-02 15:13 ` [PATCH 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
  2017-02-02 15:13 ` [PATCH 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-14 11:32   ` Joonas Lahtinen
  2017-02-02 15:13 ` [PATCH 05/14] drm/i915: Deconstruct execute fence Chris Wilson
                   ` (10 subsequent siblings)
  13 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

It had only one callsite and existed to keep the code clearer. Now
having shared the wait-on-error between phases and with plans to change
the wait-for-execute in the next few patches, remove the out of line
wait loop and move it into the main body of i915_wait_request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 68 ++++++++++++---------------------
 1 file changed, 24 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8d4f0c859fee..e4eeb5f5453c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -988,49 +988,6 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	return false;
 }
 
-static long
-__i915_request_wait_for_execute(struct drm_i915_gem_request *request,
-				unsigned int flags,
-				long timeout)
-{
-	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
-		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	wait_queue_head_t *q = &request->i915->gpu_error.wait_queue;
-	DEFINE_WAIT(reset);
-	DEFINE_WAIT(wait);
-
-	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(q, &reset);
-
-	do {
-		prepare_to_wait(&request->execute.wait, &wait, state);
-
-		if (i915_sw_fence_done(&request->execute))
-			break;
-
-		if (flags & I915_WAIT_LOCKED &&
-		    i915_reset_in_progress(&request->i915->gpu_error)) {
-			__set_current_state(TASK_RUNNING);
-			i915_reset(request->i915);
-			reset_wait_queue(q, &reset);
-			continue;
-		}
-
-		if (signal_pending_state(state, current)) {
-			timeout = -ERESTARTSYS;
-			break;
-		}
-
-		timeout = io_schedule_timeout(timeout);
-	} while (timeout);
-	finish_wait(&request->execute.wait, &wait);
-
-	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(q, &reset);
-
-	return timeout;
-}
-
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: the request to wait upon
@@ -1080,7 +1037,30 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		add_wait_queue(errq, &reset);
 
 	if (!i915_sw_fence_done(&req->execute)) {
-		timeout = __i915_request_wait_for_execute(req, flags, timeout);
+		DEFINE_WAIT(exec);
+
+		do {
+			prepare_to_wait(&req->execute.wait, &exec, state);
+			if (i915_sw_fence_done(&req->execute))
+				break;
+
+			if (flags & I915_WAIT_LOCKED &&
+			    i915_reset_in_progress(&req->i915->gpu_error)) {
+				__set_current_state(TASK_RUNNING);
+				i915_reset(req->i915);
+				reset_wait_queue(errq, &reset);
+				continue;
+			}
+
+			if (signal_pending_state(state, current)) {
+				timeout = -ERESTARTSYS;
+				break;
+			}
+
+			timeout = io_schedule_timeout(timeout);
+		} while (timeout);
+		finish_wait(&req->execute.wait, &exec);
+
 		if (timeout < 0)
 			goto complete;
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 05/14] drm/i915: Deconstruct execute fence
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (2 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-02 15:13 ` [PATCH 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

On reflection, we are only using the execute fence as a waitqueue on the
global_seqno and not using it for dependency tracking between fences
(unlike the submit and dma fences). By only treating it as a waitqueue,
we can then treat it similar to the other waitqueues durin submit,
making the code simpler.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 47 +++++++--------------------------
 drivers/gpu/drm/i915/i915_gem_request.h | 10 +------
 2 files changed, 11 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e4eeb5f5453c..e385d0c3c890 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -69,7 +69,6 @@ static void i915_fence_release(struct dma_fence *fence)
 	 * caught trying to reuse dead objects.
 	 */
 	i915_sw_fence_fini(&req->submit);
-	i915_sw_fence_fini(&req->execute);
 
 	kmem_cache_free(req->i915->requests, req);
 }
@@ -211,7 +210,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
-	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
 	GEM_BUG_ON(!i915_gem_request_completed(request));
 
 	GEM_BUG_ON(!request->i915->gt.active_requests);
@@ -422,7 +420,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	list_move_tail(&request->link, &timeline->requests);
 	spin_unlock(&request->timeline->lock);
 
-	i915_sw_fence_commit(&request->execute);
+	wake_up_all(&request->execute);
 }
 
 void i915_gem_request_submit(struct drm_i915_gem_request *request)
@@ -457,24 +455,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
-static int __i915_sw_fence_call
-execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
-{
-	struct drm_i915_gem_request *request =
-		container_of(fence, typeof(*request), execute);
-
-	switch (state) {
-	case FENCE_COMPLETE:
-		break;
-
-	case FENCE_FREE:
-		i915_gem_request_put(request);
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
 /**
  * i915_gem_request_alloc - allocate a request structure
  *
@@ -567,13 +547,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
-	i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify);
-
-	/* Ensure that the execute fence completes after the submit fence -
-	 * as we complete the execute fence from within the submit fence
-	 * callback, its completion would otherwise be visible first.
-	 */
-	i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq);
+	init_waitqueue_head(&req->execute);
 
 	i915_priotree_init(&req->priotree);
 
@@ -1015,6 +989,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
 	DEFINE_WAIT(reset);
+	DEFINE_WAIT(exec);
 	struct intel_wait wait;
 
 	might_sleep();
@@ -1036,12 +1011,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
-	if (!i915_sw_fence_done(&req->execute)) {
-		DEFINE_WAIT(exec);
-
+	reset_wait_queue(&req->execute, &exec);
+	if (!req->global_seqno) {
 		do {
-			prepare_to_wait(&req->execute.wait, &exec, state);
-			if (i915_sw_fence_done(&req->execute))
+			set_current_state(state);
+			if (req->global_seqno)
 				break;
 
 			if (flags & I915_WAIT_LOCKED &&
@@ -1059,15 +1033,14 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 			timeout = io_schedule_timeout(timeout);
 		} while (timeout);
-		finish_wait(&req->execute.wait, &exec);
+		finish_wait(&req->execute, &exec);
 
 		if (timeout < 0)
 			goto complete;
 
-		GEM_BUG_ON(!i915_sw_fence_done(&req->execute));
+		GEM_BUG_ON(!req->global_seqno);
 	}
-	GEM_BUG_ON(!i915_sw_fence_done(&req->submit));
-	GEM_BUG_ON(!req->global_seqno);
+	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
 	/* Optimistic short spin before touching IRQs */
 	if (i915_spin_request(req, state, 5))
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 9049936c571c..467d3e13fce0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -119,18 +119,10 @@ struct drm_i915_gem_request {
 	 * The submit fence is used to await upon all of the request's
 	 * dependencies. When it is signaled, the request is ready to run.
 	 * It is used by the driver to then queue the request for execution.
-	 *
-	 * The execute fence is used to signal when the request has been
-	 * sent to hardware.
-	 *
-	 * It is illegal for the submit fence of one request to wait upon the
-	 * execute fence of an earlier request. It should be sufficient to
-	 * wait upon the submit fence of the earlier request.
 	 */
 	struct i915_sw_fence submit;
-	struct i915_sw_fence execute;
 	wait_queue_t submitq;
-	wait_queue_t execq;
+	wait_queue_head_t execute;
 
 	/* A list of everyone we wait upon, and everyone who waits upon us.
 	 * Even though we will not be submitted to the hardware before the
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (3 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 05/14] drm/i915: Deconstruct execute fence Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-02 15:13 ` [PATCH 07/14] drm/i915: Take a reference whilst processing the signaler request Chris Wilson
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

A request is assigned a global seqno only when it is on the hardware
execution queue. The global seqno can be used to maintain a list of
requests on the same engine in retirement order, for example for
constructing a priority queue for waiting. Prior to its execution, or
if it is subsequently removed in the event of preemption, its global
seqno is zero. As both insertion and removal from the execution queue
may operate in IRQ context, it is not guarded by the usual struct_mutex
BKL. Instead those relying on the global seqno must be prepared for its
value to change between reads. Only when the request is complete can
the global seqno be stable (due to the memory barriers on submitting
the commands to the hardware to write the breadcrumb, if the HWS shows
that it has passed the global seqno and the global seqno is unchanged
after the read, it is indeed complete).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          | 16 ++++++--
 drivers/gpu/drm/i915/i915_gem.c          | 15 +++++---
 drivers/gpu/drm/i915/i915_gem_request.c  | 46 ++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_request.h  | 66 +++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 ++++--
 5 files changed, 113 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f82c59768f65..748aadb36e62 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3975,14 +3975,24 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 }
 
 static inline bool
-__i915_request_irq_complete(struct drm_i915_gem_request *req)
+__i915_request_irq_complete(const struct drm_i915_gem_request *req)
 {
 	struct intel_engine_cs *engine = req->engine;
+	u32 seqno = i915_gem_request_global_seqno(req);
+
+	/* The request was dequeued before we were awoken. We check after
+	 * inspecting the hw to confirm that this was the same request
+	 * that generated the HWS update. The memory barriers within
+	 * the request execution are sufficient to ensure that a check
+	 * after reading the value from hw matches this request.
+	 */
+	if (!seqno)
+		return false;
 
 	/* Before we do the heavier coherent read of the seqno,
 	 * check the value (hopefully) in the CPU cacheline.
 	 */
-	if (__i915_gem_request_completed(req))
+	if (__i915_gem_request_completed(req, seqno))
 		return true;
 
 	/* Ensure our read of the seqno is coherent so that we
@@ -4033,7 +4043,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
 			wake_up_process(tsk);
 		rcu_read_unlock();
 
-		if (__i915_gem_request_completed(req))
+		if (__i915_gem_request_completed(req, seqno))
 			return true;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2749c64a35a3..0e34d11bc5a2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -399,7 +399,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
 		i915_gem_request_retire_upto(rq);
 
-	if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
+	if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
 		/* The GPU is now idle and this client has stalled.
 		 * Since no other client has submitted a request in the
 		 * meantime, assume that this client is the only one
@@ -2590,7 +2590,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
 struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *request;
+	struct drm_i915_gem_request *request, *active = NULL;
+	unsigned long flags;
 
 	/* We are called by the error capture and reset at a random
 	 * point in time. In particular, note that neither is crucially
@@ -2600,15 +2601,19 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	 * extra delay for a recent interrupt is pointless. Hence, we do
 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
 	 */
+	spin_lock_irqsave(&engine->timeline->lock, flags);
 	list_for_each_entry(request, &engine->timeline->requests, link) {
-		if (__i915_gem_request_completed(request))
+		if (__i915_gem_request_completed(request,
+						 request->global_seqno))
 			continue;
 
 		GEM_BUG_ON(request->engine != engine);
-		return request;
+		active = request;
+		break;
 	}
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 
-	return NULL;
+	return active;
 }
 
 static bool engine_stalled(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e385d0c3c890..82606f8fd244 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -412,7 +412,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 		intel_engine_enable_signaling(request);
 	spin_unlock(&request->lock);
 
-	GEM_BUG_ON(!request->global_seqno);
 	engine->emit_breadcrumb(request,
 				request->ring->vaddr + request->postfix);
 
@@ -498,7 +497,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->timeline->requests,
 				       typeof(*req), link);
-	if (req && __i915_gem_request_completed(req))
+	if (req && i915_gem_request_completed(req))
 		i915_gem_request_retire(req);
 
 	/* Beware: Dragons be flying overhead.
@@ -604,6 +603,7 @@ static int
 i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			       struct drm_i915_gem_request *from)
 {
+	u32 seqno;
 	int ret;
 
 	GEM_BUG_ON(to == from);
@@ -626,14 +626,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 		return ret < 0 ? ret : 0;
 	}
 
-	if (!from->global_seqno) {
+	seqno = i915_gem_request_global_seqno(from);
+	if (!seqno) {
 		ret = i915_sw_fence_await_dma_fence(&to->submit,
 						    &from->fence, 0,
 						    GFP_KERNEL);
 		return ret < 0 ? ret : 0;
 	}
 
-	if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
+	if (seqno <= to->timeline->sync_seqno[from->engine->id])
 		return 0;
 
 	trace_i915_gem_ring_sync_to(to, from);
@@ -651,7 +652,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			return ret;
 	}
 
-	to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
+	to->timeline->sync_seqno[from->engine->id] = seqno;
 	return 0;
 }
 
@@ -931,7 +932,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *req,
-			 int state, unsigned long timeout_us)
+			 u32 seqno, int state, unsigned long timeout_us)
 {
 	unsigned int cpu;
 
@@ -947,7 +948,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 
 	timeout_us += local_clock_us(&cpu);
 	do {
-		if (__i915_gem_request_completed(req))
+		if (seqno != i915_gem_request_global_seqno(req))
+			break;
+
+		if (i915_seqno_passed(intel_engine_get_seqno(req->engine),
+				      seqno))
 			return true;
 
 		if (signal_pending_state(state, current))
@@ -1011,11 +1016,15 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
+	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
+
 	reset_wait_queue(&req->execute, &exec);
-	if (!req->global_seqno) {
+	if (!wait.seqno) {
 		do {
 			set_current_state(state);
-			if (req->global_seqno)
+
+			wait.seqno = i915_gem_request_global_seqno(req);
+			if (wait.seqno)
 				break;
 
 			if (flags & I915_WAIT_LOCKED &&
@@ -1038,7 +1047,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		if (timeout < 0)
 			goto complete;
 
-		GEM_BUG_ON(!req->global_seqno);
+		GEM_BUG_ON(!wait.seqno);
 	}
 	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
@@ -1047,7 +1056,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	intel_wait_init(&wait, req->global_seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
 		/* In order to check that we haven't missed the interrupt
 		 * as we enabled it, we need to kick ourselves to do a
@@ -1068,7 +1076,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 		timeout = io_schedule_timeout(timeout);
 
-		if (intel_wait_complete(&wait))
+		if (intel_wait_complete(&wait) &&
+		    i915_gem_request_global_seqno(req) == wait.seqno)
 			break;
 
 		set_current_state(state);
@@ -1119,14 +1128,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 static void engine_retire_requests(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request, *next;
+	u32 seqno = intel_engine_get_seqno(engine);
+	LIST_HEAD(retire);
 
+	spin_lock_irq(&engine->timeline->lock);
 	list_for_each_entry_safe(request, next,
 				 &engine->timeline->requests, link) {
-		if (!__i915_gem_request_completed(request))
-			return;
+		if (!i915_seqno_passed(seqno, request->global_seqno))
+			break;
 
-		i915_gem_request_retire(request);
+		list_move_tail(&request->link, &retire);
 	}
+	spin_unlock_irq(&engine->timeline->lock);
+
+	list_for_each_entry_safe(request, next, &retire, link)
+		i915_gem_request_retire(request);
 }
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 467d3e13fce0..b81f6709905c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -135,6 +135,11 @@ struct drm_i915_gem_request {
 	struct i915_priotree priotree;
 	struct i915_dependency dep;
 
+	/** GEM sequence number associated with this request on the
+	 * global execution timeline. It is zero when the request is not
+	 * on the HW queue (i.e. not on the engine timeline list).
+	 * Its value is guarded by the timeline spinlock.
+	 */
 	u32 global_seqno;
 
 	/** Position in the ring of the start of the request */
@@ -229,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 	*pdst = src;
 }
 
+/**
+ * i915_gem_request_global_seqno - report the current global seqno
+ * @request - the request
+ *
+ * A request is assigned a global seqno only when it is on the hardware
+ * execution queue. The global seqno can be used to maintain a list of
+ * requests on the same engine in retirement order, for example for
+ * constructing a priority queue for waiting. Prior to its execution, or
+ * if it is subsequently removed in the event of preemption, its global
+ * seqno is zero. As both insertion and removal from the execution queue
+ * may operate in IRQ context, it is not guarded by the usual struct_mutex
+ * BKL. Instead those relying on the global seqno must be prepared for its
+ * value to change between reads. Only when the request is complete can
+ * the global seqno be stable (due to the memory barriers on submitting
+ * the commands to the hardware to write the breadcrumb, if the HWS shows
+ * that it has passed the global seqno and the global seqno is unchanged
+ * after the read, it is indeed complete).
+ */
+static u32
+i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
+{
+	return READ_ONCE(request->global_seqno);
+}
+
 int
 i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
@@ -269,46 +298,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
 }
 
 static inline bool
-__i915_gem_request_started(const struct drm_i915_gem_request *req)
+__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
 {
-	GEM_BUG_ON(!req->global_seqno);
+	GEM_BUG_ON(!seqno);
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->global_seqno - 1);
+				 seqno - 1);
 }
 
 static inline bool
 i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
-	if (!req->global_seqno)
+	u32 seqno;
+
+	seqno = i915_gem_request_global_seqno(req);
+	if (!seqno)
 		return false;
 
-	return __i915_gem_request_started(req);
+	return __i915_gem_request_started(req, seqno);
 }
 
 static inline bool
-__i915_gem_request_completed(const struct drm_i915_gem_request *req)
+__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
 {
-	GEM_BUG_ON(!req->global_seqno);
-	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->global_seqno);
+	GEM_BUG_ON(!seqno);
+	return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
+		seqno == i915_gem_request_global_seqno(req);
 }
 
 static inline bool
 i915_gem_request_completed(const struct drm_i915_gem_request *req)
 {
-	if (!req->global_seqno)
+	u32 seqno;
+
+	seqno = i915_gem_request_global_seqno(req);
+	if (!seqno)
 		return false;
 
-	return __i915_gem_request_completed(req);
+	return __i915_gem_request_completed(req, seqno);
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *request,
-			 int state, unsigned long timeout_us);
+			 u32 seqno, int state, unsigned long timeout_us);
 static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
 				     int state, unsigned long timeout_us)
 {
-	return (__i915_gem_request_started(request) &&
-		__i915_spin_request(request, state, timeout_us));
+	u32 seqno;
+
+	seqno = i915_gem_request_global_seqno(request);
+	return (__i915_gem_request_started(request, seqno) &&
+		__i915_spin_request(request, seqno, state, timeout_us));
 }
 
 /* We treat requests as fences. This is not be to confused with our
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 6fc3ec9f639d..8a56bb516aa0 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -520,6 +520,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	struct rb_node *parent, **p;
 	bool first, wakeup;
+	u32 seqno;
 
 	/* Note that we may be called from an interrupt handler on another
 	 * device (e.g. nouveau signaling a fence completion causing us
@@ -530,11 +531,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 
 	/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
 	assert_spin_locked(&request->lock);
-	if (!request->global_seqno)
+
+	seqno = i915_gem_request_global_seqno(request);
+	if (!seqno)
 		return;
 
 	request->signaling.wait.tsk = b->signaler;
-	request->signaling.wait.seqno = request->global_seqno;
+	request->signaling.wait.seqno = seqno;
 	i915_gem_request_get(request);
 
 	spin_lock(&b->lock);
@@ -558,8 +561,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	p = &b->signals.rb_node;
 	while (*p) {
 		parent = *p;
-		if (i915_seqno_passed(request->global_seqno,
-				      to_signaler(parent)->global_seqno)) {
+		if (i915_seqno_passed(seqno,
+				      to_signaler(parent)->signaling.wait.seqno)) {
 			p = &parent->rb_right;
 			first = false;
 		} else {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 07/14] drm/i915: Take a reference whilst processing the signaler request
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (4 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-02 15:13 ` [PATCH 08/14] drm/i915: Allow an request to be cancelled Chris Wilson
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

The plan in the near-future is to allow requests to be removed from the
signaler. We can no longer then rely on holding a reference to the
request for the duration it is in the signaling tree, and instead must
obtain a reference to the request for the current operation using RCU.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 24 ++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 8a56bb516aa0..5bcd8436027c 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -474,7 +474,11 @@ static int intel_breadcrumbs_signaler(void *arg)
 		 * need to wait for a new interrupt from the GPU or for
 		 * a new client.
 		 */
-		request = READ_ONCE(b->first_signal);
+		rcu_read_lock();
+		request = rcu_dereference(b->first_signal);
+		if (request)
+			request = i915_gem_request_get_rcu(request);
+		rcu_read_unlock();
 		if (signal_complete(request)) {
 			local_bh_disable();
 			dma_fence_signal(&request->fence);
@@ -493,21 +497,25 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 * the oldest before picking the next one.
 			 */
 			spin_lock_irq(&b->lock);
-			if (request == b->first_signal) {
+			if (request == rcu_access_pointer(b->first_signal)) {
 				struct rb_node *rb =
 					rb_next(&request->signaling.node);
-				b->first_signal = rb ? to_signaler(rb) : NULL;
+				rcu_assign_pointer(b->first_signal,
+						   rb ? to_signaler(rb) : NULL);
 			}
 			rb_erase(&request->signaling.node, &b->signals);
 			spin_unlock_irq(&b->lock);
 
 			i915_gem_request_put(request);
 		} else {
-			if (kthread_should_stop())
+			if (kthread_should_stop()) {
+				GEM_BUG_ON(request);
 				break;
+			}
 
 			schedule();
 		}
+		i915_gem_request_put(request);
 	} while (1);
 	__set_current_state(TASK_RUNNING);
 
@@ -572,7 +580,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	rb_link_node(&request->signaling.node, parent, p);
 	rb_insert_color(&request->signaling.node, &b->signals);
 	if (first)
-		smp_store_mb(b->first_signal, request);
+		rcu_assign_pointer(b->first_signal, request);
 
 	spin_unlock(&b->lock);
 
@@ -646,7 +654,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 	/* The engines should be idle and all requests accounted for! */
 	WARN_ON(READ_ONCE(b->first_wait));
 	WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
-	WARN_ON(READ_ONCE(b->first_signal));
+	WARN_ON(rcu_access_pointer(b->first_signal));
 	WARN_ON(!RB_EMPTY_ROOT(&b->signals));
 
 	if (!IS_ERR_OR_NULL(b->signaler))
@@ -662,8 +670,8 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&b->lock);
 
-	if (b->first_wait) {
-		wake_up_process(b->first_wait->tsk);
+	if (rcu_access_pointer(b->first_signal)) {
+		wake_up_process(b->signaler);
 		busy |= intel_engine_flag(engine);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 9e7fdbb92231..b87317c04e1b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -240,7 +240,7 @@ struct intel_engine_cs {
 		struct rb_root signals; /* sorted by retirement */
 		struct intel_wait *first_wait; /* oldest waiter by retirement */
 		struct task_struct *signaler; /* used for fence signalling */
-		struct drm_i915_gem_request *first_signal;
+		struct drm_i915_gem_request __rcu *first_signal;
 		struct timer_list fake_irq; /* used after a missed interrupt */
 		struct timer_list hangcheck; /* detect missed interrupts */
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 08/14] drm/i915: Allow an request to be cancelled
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (5 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 07/14] drm/i915: Take a reference whilst processing the signaler request Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-02 15:13 ` [PATCH 09/14] drm/i915: Remove the preempted request from the execution queue Chris Wilson
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

If we preempt a request and remove it from the execution queue, we need
to undo its global seqno and restart any waiters.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 63 ++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 +
 2 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 5bcd8436027c..96f78ab02447 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -333,22 +333,15 @@ static inline int wakeup_priority(struct intel_breadcrumbs *b,
 		return tsk->prio;
 }
 
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-			      struct intel_wait *wait)
+static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
+				       struct intel_wait *wait)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	/* Quick check to see if this waiter was already decoupled from
-	 * the tree by the bottom-half to avoid contention on the spinlock
-	 * by the herd.
-	 */
-	if (RB_EMPTY_NODE(&wait->node))
-		return;
-
-	spin_lock_irq(&b->lock);
+	assert_spin_locked(&b->lock);
 
 	if (RB_EMPTY_NODE(&wait->node))
-		goto out_unlock;
+		goto out;
 
 	if (b->first_wait == wait) {
 		const int priority = wakeup_priority(b, wait->tsk);
@@ -414,11 +407,27 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 	GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
 	rb_erase(&wait->node, &b->waiters);
 
-out_unlock:
+out:
 	GEM_BUG_ON(b->first_wait == wait);
 	GEM_BUG_ON(rb_first(&b->waiters) !=
 		   (b->first_wait ? &b->first_wait->node : NULL));
 	GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
+}
+
+void intel_engine_remove_wait(struct intel_engine_cs *engine,
+			      struct intel_wait *wait)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	/* Quick check to see if this waiter was already decoupled from
+	 * the tree by the bottom-half to avoid contention on the spinlock
+	 * by the herd.
+	 */
+	if (RB_EMPTY_NODE(&wait->node))
+		return;
+
+	spin_lock_irq(&b->lock);
+	__intel_engine_remove_wait(engine, wait);
 	spin_unlock_irq(&b->lock);
 }
 
@@ -504,6 +513,7 @@ static int intel_breadcrumbs_signaler(void *arg)
 						   rb ? to_signaler(rb) : NULL);
 			}
 			rb_erase(&request->signaling.node, &b->signals);
+			RB_CLEAR_NODE(&request->signaling.node);
 			spin_unlock_irq(&b->lock);
 
 			i915_gem_request_put(request);
@@ -588,6 +598,35 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 		wake_up_process(b->signaler);
 }
 
+void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	assert_spin_locked(&request->lock);
+	GEM_BUG_ON(!request->signaling.wait.seqno);
+
+	spin_lock(&b->lock);
+
+	if (!RB_EMPTY_NODE(&request->signaling.node)) {
+		if (request == rcu_access_pointer(b->first_signal)) {
+			struct rb_node *rb =
+				rb_next(&request->signaling.node);
+			rcu_assign_pointer(b->first_signal,
+					   rb ? to_signaler(rb) : NULL);
+		}
+		rb_erase(&request->signaling.node, &b->signals);
+		RB_CLEAR_NODE(&request->signaling.node);
+	}
+
+	__intel_engine_remove_wait(engine, &request->signaling.wait);
+
+	spin_unlock(&b->lock);
+
+	request->signaling.wait.seqno = 0;
+	i915_gem_request_put(request);
+}
+
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b87317c04e1b..d55386d0b842 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -601,6 +601,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait);
 void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
+void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
 
 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
 {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 09/14] drm/i915: Remove the preempted request from the execution queue
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (6 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 08/14] drm/i915: Allow an request to be cancelled Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-02 15:13 ` [PATCH 10/14] drm/i915: Exercise request cancellation using a mock selftest Chris Wilson
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

After the request is cancelled, we then need to remove it from the
global execution timeline and return it to the context timeline, the
inverse of submit_request().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c  | 58 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_request.h  |  3 ++
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 19 +++++++++--
 3 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 82606f8fd244..f70a2c3d26cd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -435,6 +435,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
+void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_timeline *timeline;
+
+	assert_spin_locked(&engine->timeline->lock);
+
+	/* Only unwind in reverse order, required so that the per-context list
+	 * is kept in seqno/ring order.
+	 */
+	GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
+	engine->timeline->seqno--;
+
+	/* We may be recursing from the signal callback of another i915 fence */
+	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+	request->global_seqno = 0;
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+		intel_engine_cancel_signaling(request);
+	spin_unlock(&request->lock);
+
+	/* Transfer back from the global per-engine timeline to per-context */
+	timeline = request->timeline;
+	GEM_BUG_ON(timeline == engine->timeline);
+
+	spin_lock(&timeline->lock);
+	list_move(&request->link, &timeline->requests);
+	spin_unlock(&timeline->lock);
+
+	/* We don't need to wake_up any waiters on request->execute, they
+	 * will get woken by any other event or us re-adding this request
+	 * to the engine timeline (__i915_gem_request_submit()). The waiters
+	 * should be quite adapt at finding that the request now has a new
+	 * global_seqno to the one they went to sleep on.
+	 */
+}
+
+void i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	unsigned long flags;
+
+	/* Will be called from irq-context when using foreign fences. */
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+
+	__i915_gem_request_unsubmit(request);
+
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+}
+
 static int __i915_sw_fence_call
 submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 {
@@ -1016,9 +1065,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
-	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
+	wait.tsk = current;
 
+restart:
 	reset_wait_queue(&req->execute, &exec);
+	wait.seqno = i915_gem_request_global_seqno(req);
 	if (!wait.seqno) {
 		do {
 			set_current_state(state);
@@ -1112,6 +1163,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
 			break;
+
+		if (i915_gem_request_global_seqno(req) != wait.seqno) {
+			intel_engine_remove_wait(req->engine, &wait);
+			goto restart;
+		}
 	}
 
 	intel_engine_remove_wait(req->engine, &wait);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index b81f6709905c..5f73d8c0a38a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 void __i915_gem_request_submit(struct drm_i915_gem_request *request);
 void i915_gem_request_submit(struct drm_i915_gem_request *request);
 
+void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
+void i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
+
 struct intel_rps_client;
 #define NO_WAITBOOST ERR_PTR(-1)
 #define IS_RPS_CLIENT(p) (!IS_ERR(p))
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 96f78ab02447..843195f13302 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -431,7 +431,14 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 	spin_unlock_irq(&b->lock);
 }
 
-static bool signal_complete(struct drm_i915_gem_request *request)
+static bool signal_valid(const struct drm_i915_gem_request *request)
+{
+	u32 seqno = READ_ONCE(request->global_seqno);
+
+	return seqno == request->signaling.wait.seqno;
+}
+
+static bool signal_complete(const struct drm_i915_gem_request *request)
 {
 	if (!request)
 		return false;
@@ -440,7 +447,7 @@ static bool signal_complete(struct drm_i915_gem_request *request)
 	 * signalled that this wait is already completed.
 	 */
 	if (intel_wait_complete(&request->signaling.wait))
-		return true;
+		return signal_valid(request);
 
 	/* Carefully check if the request is complete, giving time for the
 	 * seqno to be visible or if the GPU hung.
@@ -518,12 +525,20 @@ static int intel_breadcrumbs_signaler(void *arg)
 
 			i915_gem_request_put(request);
 		} else {
+			DEFINE_WAIT(exec);
+
 			if (kthread_should_stop()) {
 				GEM_BUG_ON(request);
 				break;
 			}
 
+			if (request)
+				add_wait_queue(&request->execute, &exec);
+
 			schedule();
+
+			if (request)
+				remove_wait_queue(&request->execute, &exec);
 		}
 		i915_gem_request_put(request);
 	} while (1);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 10/14] drm/i915: Exercise request cancellation using a mock selftest
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (7 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 09/14] drm/i915: Remove the preempted request from the execution queue Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-02 15:13 ` [PATCH 11/14] drm/i915: Replace reset_wait_queue with default_wake_function Chris Wilson
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

Add a mock selftest to preempt a request and check that we cancel it,
requeue the request and then complete its execution.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/selftests/i915_gem_request.c | 59 +++++++++++++++++++++++
 drivers/gpu/drm/i915/selftests/mock_request.c     | 19 ++++++++
 drivers/gpu/drm/i915/selftests/mock_request.h     |  2 +
 3 files changed, 80 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 92fa55bd68c8..4c565a7b9eff 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -26,6 +26,7 @@
 
 #include "../i915_selftest.h"
 
+#include "mock_context.h"
 #include "mock_gem_device.h"
 
 static int igt_add_request(void *arg)
@@ -144,12 +145,70 @@ static int igt_fence_wait(void *arg)
 	return err;
 }
 
+static int igt_request_rewind(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_request *request, *vip;
+	struct i915_gem_context *ctx[2];
+	int err = -EINVAL;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	ctx[0] = mock_context(i915, "A");
+	request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ);
+	if (!request) {
+		err = -ENOMEM;
+		goto err_device;
+	}
+	i915_add_request(request);
+
+	ctx[1] = mock_context(i915, "B");
+	vip = mock_request(i915->engine[RCS], ctx[1], 0);
+	if (!vip) {
+		err = -ENOMEM;
+		goto err_locked;
+	}
+
+	/* Simulate preemption by manual reordering */
+	if (!mock_cancel_request(request)) {
+		pr_err("failed to cancel request (already executed)!\n");
+		i915_add_request(vip);
+		goto err_locked;
+	}
+	i915_add_request(vip);
+	request->engine->submit_request(request);
+
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (i915_wait_request(vip, 0, HZ) == -ETIME) {
+		pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n",
+		       vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS]));
+		goto err;
+	}
+
+	if (i915_gem_request_completed(request)) {
+		pr_err("low priority request already completed\n");
+		goto err;
+	}
+
+	err = 0;
+err:
+	mutex_lock(&i915->drm.struct_mutex);
+err_locked:
+	mock_context_close(ctx[1]);
+	mock_context_close(ctx[0]);
+err_device:
+	mock_device_flush(i915);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int i915_gem_request_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_add_request),
 		SUBTEST(igt_wait_request),
 		SUBTEST(igt_fence_wait),
+		SUBTEST(igt_request_rewind),
 	};
 	struct drm_i915_private *i915;
 	int err;
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index e23242d1b88a..0e8d2e7f8c70 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include "mock_engine.h"
 #include "mock_request.h"
 
 struct drm_i915_gem_request *
@@ -42,3 +43,21 @@ mock_request(struct intel_engine_cs *engine,
 
 	return &mock->base;
 }
+
+bool mock_cancel_request(struct drm_i915_gem_request *request)
+{
+	struct mock_request *mock = container_of(request, typeof(*mock), base);
+	struct mock_engine *engine =
+		container_of(request->engine, typeof(*engine), base);
+	bool was_queued;
+
+	spin_lock_irq(&engine->hw_lock);
+	was_queued = !list_empty(&mock->link);
+	list_del_init(&mock->link);
+	spin_unlock_irq(&engine->hw_lock);
+
+	if (was_queued)
+		i915_gem_request_unsubmit(request);
+
+	return was_queued;
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h
index cc76d4f4eb4e..4dea74c8e96d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.h
+++ b/drivers/gpu/drm/i915/selftests/mock_request.h
@@ -41,4 +41,6 @@ mock_request(struct intel_engine_cs *engine,
 	     struct i915_gem_context *context,
 	     unsigned long delay);
 
+bool mock_cancel_request(struct drm_i915_gem_request *request);
+
 #endif /* !__MOCK_REQUEST__ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 11/14] drm/i915: Replace reset_wait_queue with default_wake_function
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (8 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 10/14] drm/i915: Exercise request cancellation using a mock selftest Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-02 15:13 ` [PATCH 12/14] drm/i915: Refactor direct GPU reset from request waiters Chris Wilson
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

If we change the wait_queue_t from using the autoremove_wake_function to
the default_wake_function, we no longer have to restore the wait_queue_t
entry on the wait_queue_head_t list after being woken up by it, as we
are unusual in sleeping multiple times on the same wait_queue_t.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 28 ++++++----------------------
 1 file changed, 6 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index f70a2c3d26cd..6cf1210984d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -938,16 +938,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 }
 
-static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-
 static unsigned long local_clock_us(unsigned int *cpu)
 {
 	unsigned long t;
@@ -1042,8 +1032,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
-	DEFINE_WAIT(reset);
-	DEFINE_WAIT(exec);
+	DEFINE_WAIT_FUNC(reset, default_wake_function);
+	DEFINE_WAIT_FUNC(exec, default_wake_function);
 	struct intel_wait wait;
 
 	might_sleep();
@@ -1062,13 +1052,13 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	trace_i915_gem_request_wait_begin(req);
 
+	add_wait_queue(&req->execute, &exec);
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
 	wait.tsk = current;
 
 restart:
-	reset_wait_queue(&req->execute, &exec);
 	wait.seqno = i915_gem_request_global_seqno(req);
 	if (!wait.seqno) {
 		do {
@@ -1082,21 +1072,16 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 			    i915_reset_in_progress(&req->i915->gpu_error)) {
 				__set_current_state(TASK_RUNNING);
 				i915_reset(req->i915);
-				reset_wait_queue(errq, &reset);
 				continue;
 			}
 
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
-				break;
+				goto complete;
 			}
 
 			timeout = io_schedule_timeout(timeout);
 		} while (timeout);
-		finish_wait(&req->execute, &exec);
-
-		if (timeout < 0)
-			goto complete;
 
 		GEM_BUG_ON(!wait.seqno);
 	}
@@ -1156,7 +1141,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		    i915_reset_in_progress(&req->i915->gpu_error)) {
 			__set_current_state(TASK_RUNNING);
 			i915_reset(req->i915);
-			reset_wait_queue(errq, &reset);
 			continue;
 		}
 
@@ -1171,11 +1155,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	}
 
 	intel_engine_remove_wait(req->engine, &wait);
-	__set_current_state(TASK_RUNNING);
-
 complete:
+	__set_current_state(TASK_RUNNING);
 	if (flags & I915_WAIT_LOCKED)
 		remove_wait_queue(errq, &reset);
+	remove_wait_queue(&req->execute, &exec);
 	trace_i915_gem_request_wait_end(req);
 
 	return timeout;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 12/14] drm/i915: Refactor direct GPU reset from request waiters
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (9 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 11/14] drm/i915: Replace reset_wait_queue with default_wake_function Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-02 15:13 ` [PATCH 13/14] drm/i915: Immediately process a reset before starting waiting Chris Wilson
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

Combine the common code for the pair of waiters into a single function.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 6cf1210984d7..71ae3dc443d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1006,6 +1006,16 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	return false;
 }
 
+static bool __i915_reset_request(struct drm_i915_gem_request *request)
+{
+	if (likely(!i915_reset_in_progress(&request->i915->gpu_error)))
+		return false;
+
+	__set_current_state(TASK_RUNNING);
+	i915_reset(request->i915);
+	return true;
+}
+
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: the request to wait upon
@@ -1069,11 +1079,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 				break;
 
 			if (flags & I915_WAIT_LOCKED &&
-			    i915_reset_in_progress(&req->i915->gpu_error)) {
-				__set_current_state(TASK_RUNNING);
-				i915_reset(req->i915);
+			    __i915_reset_request(req))
 				continue;
-			}
 
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
@@ -1137,12 +1144,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		 * to come along and update the breadcrumb (either directly
 		 * itself, or indirectly by recovering the GPU).
 		 */
-		if (flags & I915_WAIT_LOCKED &&
-		    i915_reset_in_progress(&req->i915->gpu_error)) {
-			__set_current_state(TASK_RUNNING);
-			i915_reset(req->i915);
+		if (flags & I915_WAIT_LOCKED && __i915_reset_request(req))
 			continue;
-		}
 
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 13/14] drm/i915: Immediately process a reset before starting waiting
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (10 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 12/14] drm/i915: Refactor direct GPU reset from request waiters Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-02 15:13 ` [PATCH 14/14] drm/i915: Remove one level of indention from wait-for-execute Chris Wilson
  2017-02-14 11:13 ` [PATCH 01/14] drm/i915: Keep a global seqno per-engine Joonas Lahtinen
  13 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

As we handoff the GPU reset to the waiter, we need to check we don't
miss a wakeup if it has already been sent prior to us starting the wait.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 71ae3dc443d8..bfdb1013da7d 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1063,8 +1063,10 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	trace_i915_gem_request_wait_begin(req);
 
 	add_wait_queue(&req->execute, &exec);
-	if (flags & I915_WAIT_LOCKED)
+	if (flags & I915_WAIT_LOCKED) {
 		add_wait_queue(errq, &reset);
+		__i915_reset_request(req);
+	}
 
 	wait.tsk = current;
 
@@ -1099,7 +1101,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	if (intel_engine_add_wait(req->engine, &wait))
+	if (intel_engine_add_wait(req->engine, &wait) ||
+	    flags & I915_WAIT_LOCKED)
 		/* In order to check that we haven't missed the interrupt
 		 * as we enabled it, we need to kick ourselves to do a
 		 * coherent check on the seqno before we sleep.
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 14/14] drm/i915: Remove one level of indention from wait-for-execute
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (11 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 13/14] drm/i915: Immediately process a reset before starting waiting Chris Wilson
@ 2017-02-02 15:13 ` Chris Wilson
  2017-02-14 11:13 ` [PATCH 01/14] drm/i915: Keep a global seqno per-engine Joonas Lahtinen
  13 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-02 15:13 UTC (permalink / raw)
  To: intel-gfx

Now that the code is getting simpler, we can reduce the indentation when
waiting for the global_seqno.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index bfdb1013da7d..e75edca2e6c3 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1071,29 +1071,24 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	wait.tsk = current;
 
 restart:
-	wait.seqno = i915_gem_request_global_seqno(req);
-	if (!wait.seqno) {
-		do {
-			set_current_state(state);
-
-			wait.seqno = i915_gem_request_global_seqno(req);
-			if (wait.seqno)
-				break;
+	do {
+		set_current_state(state);
+		wait.seqno = i915_gem_request_global_seqno(req);
+		if (wait.seqno)
+			break;
 
-			if (flags & I915_WAIT_LOCKED &&
-			    __i915_reset_request(req))
-				continue;
+		if (flags & I915_WAIT_LOCKED && __i915_reset_request(req))
+			continue;
 
-			if (signal_pending_state(state, current)) {
-				timeout = -ERESTARTSYS;
-				goto complete;
-			}
+		if (signal_pending_state(state, current)) {
+			timeout = -ERESTARTSYS;
+			goto complete;
+		}
 
-			timeout = io_schedule_timeout(timeout);
-		} while (timeout);
+		timeout = io_schedule_timeout(timeout);
+	} while (timeout);
 
-		GEM_BUG_ON(!wait.seqno);
-	}
+	GEM_BUG_ON(!wait.seqno);
 	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
 	/* Optimistic short spin before touching IRQs */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 01/14] drm/i915: Keep a global seqno per-engine
  2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (12 preceding siblings ...)
  2017-02-02 15:13 ` [PATCH 14/14] drm/i915: Remove one level of indention from wait-for-execute Chris Wilson
@ 2017-02-14 11:13 ` Joonas Lahtinen
  2017-02-14 11:57   ` Chris Wilson
  13 siblings, 1 reply; 20+ messages in thread
From: Joonas Lahtinen @ 2017-02-14 11:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On to, 2017-02-02 at 15:12 +0000, Chris Wilson wrote:
> Replace the global device seqno with one for each engine, and account
> for in-flight seqno on each separately. This is consistent with
> dma-fence as each timeline has separate fence-contexts for each engine
> and a seqno is only ordered within a fence-context (i.e.  seqno do not
> need to be ordered wrt to other engines, just ordered within a single
> engine). This is required to enable request rewinding for preemption on
> individual engines (we have to rewind the global seqno to avoid
> overflow, and we do not have to rewind all engines just to preempt one.)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

> @@ -213,7 +213,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>  	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
>  	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
>  	GEM_BUG_ON(!i915_gem_request_completed(request));
> +
>  	GEM_BUG_ON(!request->i915->gt.active_requests);
> +	GEM_BUG_ON(!request->engine->timeline->active_seqno);

active_seqnos to indicate it's a count.
 
> -static int reserve_global_seqno(struct drm_i915_private *i915)
> +static int reserve_global_seqno(struct intel_engine_cs *engine)
>  {
> -	u32 active_requests = ++i915->gt.active_requests;
> -	u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
> +	u32 active = ++engine->timeline->active_seqno;
> +	u32 seqno = engine->timeline->seqno;
>  	int ret;
>  
>  	/* Reservation is fine until we need to wrap around */
> -	if (likely(seqno + active_requests > seqno))
> +	if (likely(seqno + active > seqno))
>  		return 0;
> 

Make a comment here that we don't currently track the semaphores
waiting on specific engine, so we instead choose to wrap all of them.
 
> -	ret = i915_gem_init_global_seqno(i915, 0);
> +	ret = i915_gem_init_global_seqno(engine->i915, 0);

Also, we don't have a global seqno anymore, so the function name needs
updating. Maybe "init_{all_}global_seqnos" to make it clear.

>  	if (ret) {
> -		i915->gt.active_requests--;
> +		engine->timeline->active_seqno--;
>  		return ret;
>  	}
>  
>  	return 0;
>  }

<SNIP>

> @@ -889,16 +887,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  	list_add_tail(&request->link, &timeline->requests);
>  	spin_unlock_irq(&timeline->lock);
>  
> -	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
> -				     request->fence.seqno));
> -
> -	timeline->last_submitted_seqno = request->fence.seqno;
> +	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
>  	i915_gem_active_set(&timeline->last_request, request);
>  
>  	list_add_tail(&request->ring_link, &ring->request_list);
>  	request->emitted_jiffies = jiffies;
>  
> -	i915_gem_mark_busy(engine);
> +	if (!request->i915->gt.active_requests++)
> +		i915_gem_mark_busy(engine);

This kinda hides the increment, I'd prefer to have it better in sight.

With the comment added and function renamed;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue
  2017-02-02 15:13 ` [PATCH 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
@ 2017-02-14 11:17   ` Joonas Lahtinen
  0 siblings, 0 replies; 20+ messages in thread
From: Joonas Lahtinen @ 2017-02-14 11:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On to, 2017-02-02 at 15:13 +0000, Chris Wilson wrote:
> Use a local variable to avoid having to type out the full name of the
> gpu_error wait_queue.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

I'm surprised, usually it's the opposite; converting to
something->anything->i915...

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait
  2017-02-02 15:13 ` [PATCH 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
@ 2017-02-14 11:25   ` Joonas Lahtinen
  0 siblings, 0 replies; 20+ messages in thread
From: Joonas Lahtinen @ 2017-02-14 11:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On to, 2017-02-02 at 15:13 +0000, Chris Wilson wrote:
> Add ourselves to the gpu error waitqueue earllier on, even before we

s/earllier/earlier/

> determine we have to wait on the seqno. This is so that we can then
> share the waitqueue between stages in subsequent patches.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute()
  2017-02-02 15:13 ` [PATCH 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
@ 2017-02-14 11:32   ` Joonas Lahtinen
  2017-02-14 11:55     ` Chris Wilson
  0 siblings, 1 reply; 20+ messages in thread
From: Joonas Lahtinen @ 2017-02-14 11:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On to, 2017-02-02 at 15:13 +0000, Chris Wilson wrote:
> It had only one callsite and existed to keep the code clearer. Now
> having shared the wait-on-error between phases and with plans to change
> the wait-for-execute in the next few patches, remove the out of line
> wait loop and move it into the main body of i915_wait_request.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

> @@ -1080,7 +1037,30 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		add_wait_queue(errq, &reset);
>  
>  	if (!i915_sw_fence_done(&req->execute)) {
> -		timeout = __i915_request_wait_for_execute(req, flags, timeout);
> +		DEFINE_WAIT(exec);
> +
> +		do {
> +			prepare_to_wait(&req->execute.wait, &exec, state);
> +			if (i915_sw_fence_done(&req->execute))
> +				break;
> +
> +			if (flags & I915_WAIT_LOCKED &&
> +			    i915_reset_in_progress(&req->i915->gpu_error)) {
> +				__set_current_state(TASK_RUNNING);
> +				i915_reset(req->i915);

I'm no the expert here, but this reads funnily; "if reset in progress,
do reset".

This was pre-existing code, so;

Reviewed-by: Joonas Lahtinen <joonas.lahtine@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute()
  2017-02-14 11:32   ` Joonas Lahtinen
@ 2017-02-14 11:55     ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-14 11:55 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Tue, Feb 14, 2017 at 01:32:37PM +0200, Joonas Lahtinen wrote:
> On to, 2017-02-02 at 15:13 +0000, Chris Wilson wrote:
> > It had only one callsite and existed to keep the code clearer. Now
> > having shared the wait-on-error between phases and with plans to change
> > the wait-for-execute in the next few patches, remove the out of line
> > wait loop and move it into the main body of i915_wait_request.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> <SNIP>
> 
> > @@ -1080,7 +1037,30 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> >  		add_wait_queue(errq, &reset);
> >  
> >  	if (!i915_sw_fence_done(&req->execute)) {
> > -		timeout = __i915_request_wait_for_execute(req, flags, timeout);
> > +		DEFINE_WAIT(exec);
> > +
> > +		do {
> > +			prepare_to_wait(&req->execute.wait, &exec, state);
> > +			if (i915_sw_fence_done(&req->execute))
> > +				break;
> > +
> > +			if (flags & I915_WAIT_LOCKED &&
> > +			    i915_reset_in_progress(&req->i915->gpu_error)) {
> > +				__set_current_state(TASK_RUNNING);
> > +				i915_reset(req->i915);
> 
> I'm no the expert here, but this reads funnily; "if reset in progress,
> do reset".

In a series-to-come, this is renamed to

	if (i915_reset_handoff())
		i915_reset();

-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 01/14] drm/i915: Keep a global seqno per-engine
  2017-02-14 11:13 ` [PATCH 01/14] drm/i915: Keep a global seqno per-engine Joonas Lahtinen
@ 2017-02-14 11:57   ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-02-14 11:57 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Tue, Feb 14, 2017 at 01:13:58PM +0200, Joonas Lahtinen wrote:
> On to, 2017-02-02 at 15:12 +0000, Chris Wilson wrote:
> > -static int reserve_global_seqno(struct drm_i915_private *i915)
> > +static int reserve_global_seqno(struct intel_engine_cs *engine)
> >  {
> > -	u32 active_requests = ++i915->gt.active_requests;
> > -	u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
> > +	u32 active = ++engine->timeline->active_seqno;
> > +	u32 seqno = engine->timeline->seqno;
> >  	int ret;
> >  
> >  	/* Reservation is fine until we need to wrap around */
> > -	if (likely(seqno + active_requests > seqno))
> > +	if (likely(seqno + active > seqno))
> >  		return 0;
> > 
> 
> Make a comment here that we don't currently track the semaphores
> waiting on specific engine, so we instead choose to wrap all of them.
>  
> > -	ret = i915_gem_init_global_seqno(i915, 0);
> > +	ret = i915_gem_init_global_seqno(engine->i915, 0);
> 
> Also, we don't have a global seqno anymore, so the function name needs
> updating. Maybe "init_{all_}global_seqnos" to make it clear.

However, i915_gem_init_global_seqno() is acting globally. It's debug code
but I think the implication is important to keep. reserve_global_seqno()
is now local and that feels appropriate to rename.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-02-14 11:58 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-02 15:12 [PATCH 01/14] drm/i915: Keep a global seqno per-engine Chris Wilson
2017-02-02 15:13 ` [PATCH 02/14] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
2017-02-14 11:17   ` Joonas Lahtinen
2017-02-02 15:13 ` [PATCH 03/14] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
2017-02-14 11:25   ` Joonas Lahtinen
2017-02-02 15:13 ` [PATCH 04/14] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
2017-02-14 11:32   ` Joonas Lahtinen
2017-02-14 11:55     ` Chris Wilson
2017-02-02 15:13 ` [PATCH 05/14] drm/i915: Deconstruct execute fence Chris Wilson
2017-02-02 15:13 ` [PATCH 06/14] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
2017-02-02 15:13 ` [PATCH 07/14] drm/i915: Take a reference whilst processing the signaler request Chris Wilson
2017-02-02 15:13 ` [PATCH 08/14] drm/i915: Allow an request to be cancelled Chris Wilson
2017-02-02 15:13 ` [PATCH 09/14] drm/i915: Remove the preempted request from the execution queue Chris Wilson
2017-02-02 15:13 ` [PATCH 10/14] drm/i915: Exercise request cancellation using a mock selftest Chris Wilson
2017-02-02 15:13 ` [PATCH 11/14] drm/i915: Replace reset_wait_queue with default_wake_function Chris Wilson
2017-02-02 15:13 ` [PATCH 12/14] drm/i915: Refactor direct GPU reset from request waiters Chris Wilson
2017-02-02 15:13 ` [PATCH 13/14] drm/i915: Immediately process a reset before starting waiting Chris Wilson
2017-02-02 15:13 ` [PATCH 14/14] drm/i915: Remove one level of indention from wait-for-execute Chris Wilson
2017-02-14 11:13 ` [PATCH 01/14] drm/i915: Keep a global seqno per-engine Joonas Lahtinen
2017-02-14 11:57   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.