All of lore.kernel.org
 help / color / mirror / Atom feed
* Pre-emption pre-enablement
@ 2017-02-22 11:45 Chris Wilson
  2017-02-22 11:45 ` [PATCH v2 01/15] drm/i915: Keep a global seqno per-engine Chris Wilson
                   ` (16 more replies)
  0 siblings, 17 replies; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:45 UTC (permalink / raw)
  To: intel-gfx

Same seqno series as before addressing comments last time and adding r-b
where given. Functionally the same as before, but I would like to land
this soonish as it eliminates one debugging BUG_ON that CI keeps
hitting, though I am pretty sure that is just a datarace...
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v2 01/15] drm/i915: Keep a global seqno per-engine
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
@ 2017-02-22 11:45 ` Chris Wilson
  2017-02-22 12:24   ` Tvrtko Ursulin
  2017-02-22 11:45 ` [PATCH v2 02/15] drm/i915: Move reeerve_seqno() next to unreserve_seqno() Chris Wilson
                   ` (15 subsequent siblings)
  16 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:45 UTC (permalink / raw)
  To: intel-gfx

Replace the global device seqno with one for each engine, and account
for in-flight seqno on each separately. This is consistent with
dma-fence as each timeline has separate fence-contexts for each engine
and a seqno is only ordered within a fence-context (i.e.  seqno do not
need to be ordered wrt to other engines, just ordered within a single
engine). This is required to enable request rewinding for preemption on
individual engines (we have to rewind the global seqno to avoid
overflow, and we do not have to rewind all engines just to preempt one.)

v2: Rename active_seqno to inflight_seqnos to more clearly indicate that
it is a counter and not equivalent to the existing seqno. Update
functions that operated on active_seqno similarly.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      | 11 +----
 drivers/gpu/drm/i915/i915_gem_request.c  | 83 +++++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_request.h  |  8 +--
 drivers/gpu/drm/i915/i915_gem_timeline.h |  9 +++-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 ++++++-------
 drivers/gpu/drm/i915/intel_engine_cs.c   |  2 -
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
 7 files changed, 70 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 655e60d609c2..1a28b5279bec 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1080,15 +1080,6 @@ static const struct file_operations i915_error_state_fops = {
 #endif
 
 static int
-i915_next_seqno_get(void *data, u64 *val)
-{
-	struct drm_i915_private *dev_priv = data;
-
-	*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
-	return 0;
-}
-
-static int
 i915_next_seqno_set(void *data, u64 val)
 {
 	struct drm_i915_private *dev_priv = data;
@@ -1106,7 +1097,7 @@ i915_next_seqno_set(void *data, u64 val)
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
-			i915_next_seqno_get, i915_next_seqno_set,
+			NULL, i915_next_seqno_set,
 			"0x%llx\n");
 
 static int i915_frequency_info(struct seq_file *m, void *unused)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 51bc24d1d7c9..e8b354cf2f06 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -198,6 +198,12 @@ i915_priotree_init(struct i915_priotree *pt)
 	pt->priority = INT_MIN;
 }
 
+static void unreserve_seqno(struct intel_engine_cs *engine)
+{
+	GEM_BUG_ON(!engine->timeline->inflight_seqnos);
+	engine->timeline->inflight_seqnos--;
+}
+
 void i915_gem_retire_noop(struct i915_gem_active *active,
 			  struct drm_i915_gem_request *request)
 {
@@ -237,6 +243,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 				 &request->i915->gt.idle_work,
 				 msecs_to_jiffies(100));
 	}
+	unreserve_seqno(request->engine);
 
 	/* Walk through the active list, calling retire on each. This allows
 	 * objects to track their GPU activity and mark themselves as idle
@@ -307,7 +314,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	} while (tmp != req);
 }
 
-static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
+static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 {
 	struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
 	struct intel_engine_cs *engine;
@@ -325,15 +332,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
 	GEM_BUG_ON(i915->gt.active_requests > 1);
 
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-	if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
-		while (intel_breadcrumbs_busy(i915))
-			cond_resched(); /* spin until threads are complete */
-	}
-	atomic_set(&timeline->seqno, seqno);
+	for_each_engine(engine, i915, id) {
+		struct intel_timeline *tl = &timeline->engine[id];
 
-	/* Finally reset hw state */
-	for_each_engine(engine, i915, id)
+		if (!i915_seqno_passed(seqno, tl->seqno)) {
+			/* spin until threads are complete */
+			while (intel_breadcrumbs_busy(engine))
+				cond_resched();
+		}
+
+		/* Finally reset hw state */
+		tl->seqno = seqno;
 		intel_engine_init_global_seqno(engine, seqno);
+	}
 
 	list_for_each_entry(timeline, &i915->gt.timelines, link) {
 		for_each_engine(engine, i915, id) {
@@ -358,37 +369,38 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
 	/* HWS page needs to be set less than what we
 	 * will inject to ring
 	 */
-	return i915_gem_init_global_seqno(dev_priv, seqno - 1);
+	return reset_all_global_seqno(dev_priv, seqno - 1);
 }
 
-static int reserve_global_seqno(struct drm_i915_private *i915)
+static int reserve_seqno(struct intel_engine_cs *engine)
 {
-	u32 active_requests = ++i915->gt.active_requests;
-	u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
+	u32 active = ++engine->timeline->inflight_seqnos;
+	u32 seqno = engine->timeline->seqno;
 	int ret;
 
 	/* Reservation is fine until we need to wrap around */
-	if (likely(seqno + active_requests > seqno))
+	if (likely(!add_overflows(seqno, active)))
 		return 0;
 
-	ret = i915_gem_init_global_seqno(i915, 0);
+	/* Even though we are tracking inflight seqno individually on each
+	 * engine, other engines may be observing us using hw semaphores and
+	 * so we need to idle all engines before wrapping around this engine.
+	 * As all engines are then idle, we can reset the seqno on all, so
+	 * we don't stall in quick succession if each engine is being
+	 * similarly utilized.
+	 */
+	ret = reset_all_global_seqno(engine->i915, 0);
 	if (ret) {
-		i915->gt.active_requests--;
+		engine->timeline->inflight_seqnos--;
 		return ret;
 	}
 
 	return 0;
 }
 
-static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
-{
-	/* seqno only incremented under a mutex */
-	return ++tl->seqno.counter;
-}
-
-static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
+static u32 timeline_get_seqno(struct intel_timeline *tl)
 {
-	return atomic_inc_return(&tl->seqno);
+	return ++tl->seqno;
 }
 
 void __i915_gem_request_submit(struct drm_i915_gem_request *request)
@@ -402,14 +414,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(timeline == request->timeline);
 	assert_spin_locked(&timeline->lock);
 
-	seqno = timeline_get_seqno(timeline->common);
+	seqno = timeline_get_seqno(timeline);
 	GEM_BUG_ON(!seqno);
 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
 
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
-	request->previous_seqno = timeline->last_submitted_seqno;
-	timeline->last_submitted_seqno = seqno;
-
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = seqno;
@@ -516,7 +524,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = reserve_global_seqno(dev_priv);
+	ret = reserve_seqno(engine);
 	if (ret)
 		goto err_unpin;
 
@@ -568,7 +576,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       &i915_fence_ops,
 		       &req->lock,
 		       req->timeline->fence_context,
-		       __timeline_get_seqno(req->timeline->common));
+		       timeline_get_seqno(req->timeline));
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
@@ -613,6 +621,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 */
 	req->head = req->ring->tail;
 
+	/* Check that we didn't interrupt ourselves with a new request */
+	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
 	return req;
 
 err_ctx:
@@ -623,7 +633,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	kmem_cache_free(dev_priv->requests, req);
 err_unreserve:
-	dev_priv->gt.active_requests--;
+	unreserve_seqno(engine);
 err_unpin:
 	engine->context_unpin(engine, ctx);
 	return ERR_PTR(ret);
@@ -837,8 +847,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * our i915_gem_request_alloc() and called __i915_add_request() before
 	 * us, the timeline will hold its seqno which is later than ours.
 	 */
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-				     request->fence.seqno));
+	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
 
 	/*
 	 * To ensure that this call will not fail, space for its emissions
@@ -892,16 +901,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	list_add_tail(&request->link, &timeline->requests);
 	spin_unlock_irq(&timeline->lock);
 
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-				     request->fence.seqno));
-
-	timeline->last_submitted_seqno = request->fence.seqno;
+	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
 	i915_gem_active_set(&timeline->last_request, request);
 
 	list_add_tail(&request->ring_link, &ring->request_list);
 	request->emitted_jiffies = jiffies;
 
-	i915_gem_mark_busy(engine);
+	if (!request->i915->gt.active_requests++)
+		i915_gem_mark_busy(engine);
 
 	/* Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index ea511f06efaf..9049936c571c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -145,12 +145,6 @@ struct drm_i915_gem_request {
 
 	u32 global_seqno;
 
-	/** GEM sequence number associated with the previous request,
-	 * when the HWS breadcrumb is equal to this the GPU is processing
-	 * this request.
-	 */
-	u32 previous_seqno;
-
 	/** Position in the ring of the start of the request */
 	u32 head;
 
@@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
 	GEM_BUG_ON(!req->global_seqno);
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->previous_seqno);
+				 req->global_seqno - 1);
 }
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index f2e51f42cc2f..6c53e14cab2a 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -33,7 +33,13 @@ struct i915_gem_timeline;
 
 struct intel_timeline {
 	u64 fence_context;
-	u32 last_submitted_seqno;
+	u32 seqno;
+
+	/**
+	 * Count of outstanding requests, from the time they are constructed
+	 * to the moment they are retired. Loosely coupled to hardware.
+	 */
+	u32 inflight_seqnos;
 
 	spinlock_t lock;
 
@@ -56,7 +62,6 @@ struct intel_timeline {
 
 struct i915_gem_timeline {
 	struct list_head link;
-	atomic_t seqno;
 
 	struct drm_i915_private *i915;
 	const char *name;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 5932e2dc0c6f..4f4e703d1b14 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -676,31 +676,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 	cancel_fake_irq(engine);
 }
 
-unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915)
+bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	unsigned int mask = 0;
-
-	for_each_engine(engine, i915, id) {
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-		spin_lock_irq(&b->lock);
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	bool busy = false;
 
-		if (b->first_wait) {
-			wake_up_process(b->first_wait->tsk);
-			mask |= intel_engine_flag(engine);
-		}
+	spin_lock_irq(&b->lock);
 
-		if (b->first_signal) {
-			wake_up_process(b->signaler);
-			mask |= intel_engine_flag(engine);
-		}
+	if (b->first_wait) {
+		wake_up_process(b->first_wait->tsk);
+		busy |= intel_engine_flag(engine);
+	}
 
-		spin_unlock_irq(&b->lock);
+	if (b->first_signal) {
+		wake_up_process(b->signaler);
+		busy |= intel_engine_flag(engine);
 	}
 
-	return mask;
+	spin_unlock_irq(&b->lock);
+
+	return busy;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 46c94740be53..c4d4698f98e7 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -252,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 		engine->irq_seqno_barrier(engine);
 
 	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
-	engine->timeline->last_submitted_seqno = seqno;
-
 	engine->hangcheck.seqno = seqno;
 
 	/* After manually advancing the seqno, fake the interrupt in case
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4091c97be6ec..b91c2c7ef5a6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -556,7 +556,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
 	 * wtih serialising this hint with anything, so document it as
 	 * a hint and nothing more.
 	 */
-	return READ_ONCE(engine->timeline->last_submitted_seqno);
+	return READ_ONCE(engine->timeline->seqno);
 }
 
 int init_workarounds_ring(struct intel_engine_cs *engine);
@@ -630,7 +630,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
 
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
+bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
 
 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 02/15] drm/i915: Move reeerve_seqno() next to unreserve_seqno()
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
  2017-02-22 11:45 ` [PATCH v2 01/15] drm/i915: Keep a global seqno per-engine Chris Wilson
@ 2017-02-22 11:45 ` Chris Wilson
  2017-02-22 12:23   ` Joonas Lahtinen
  2017-02-22 11:45 ` [PATCH v2 03/15] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
                   ` (14 subsequent siblings)
  16 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:45 UTC (permalink / raw)
  To: intel-gfx

Move the companion functions next to each other.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 161 +++++++++++++++-----------------
 1 file changed, 77 insertions(+), 84 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e8b354cf2f06..31c454089c87 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -198,6 +198,83 @@ i915_priotree_init(struct i915_priotree *pt)
 	pt->priority = INT_MIN;
 }
 
+static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
+{
+	struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int ret;
+
+	/* Carefully retire all requests without writing to the rings */
+	ret = i915_gem_wait_for_idle(i915,
+				     I915_WAIT_INTERRUPTIBLE |
+				     I915_WAIT_LOCKED);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests(i915);
+	GEM_BUG_ON(i915->gt.active_requests > 1);
+
+	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
+	for_each_engine(engine, i915, id) {
+		struct intel_timeline *tl = &timeline->engine[id];
+
+		if (!i915_seqno_passed(seqno, tl->seqno)) {
+			/* spin until threads are complete */
+			while (intel_breadcrumbs_busy(engine))
+				cond_resched();
+		}
+
+		/* Finally reset hw state */
+		tl->seqno = seqno;
+		intel_engine_init_global_seqno(engine, seqno);
+	}
+
+	list_for_each_entry(timeline, &i915->gt.timelines, link) {
+		for_each_engine(engine, i915, id) {
+			struct intel_timeline *tl = &timeline->engine[id];
+
+			memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno));
+		}
+	}
+
+	return 0;
+}
+
+int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	if (seqno == 0)
+		return -EINVAL;
+
+	/* HWS page needs to be set less than what we
+	 * will inject to ring
+	 */
+	return reset_all_global_seqno(dev_priv, seqno - 1);
+}
+
+static int reserve_seqno(struct intel_engine_cs *engine)
+{
+	u32 active = ++engine->timeline->inflight_seqnos;
+	u32 seqno = engine->timeline->seqno;
+	int ret;
+
+	/* Reservation is fine until we need to wrap around */
+	if (likely(!add_overflows(seqno, active)))
+		return 0;
+
+	ret = reset_all_global_seqno(engine->i915, 0);
+	if (ret) {
+		engine->timeline->inflight_seqnos--;
+		return ret;
+	}
+
+	return 0;
+}
+
 static void unreserve_seqno(struct intel_engine_cs *engine)
 {
 	GEM_BUG_ON(!engine->timeline->inflight_seqnos);
@@ -314,90 +391,6 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	} while (tmp != req);
 }
 
-static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
-{
-	struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int ret;
-
-	/* Carefully retire all requests without writing to the rings */
-	ret = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE |
-				     I915_WAIT_LOCKED);
-	if (ret)
-		return ret;
-
-	i915_gem_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests > 1);
-
-	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-	for_each_engine(engine, i915, id) {
-		struct intel_timeline *tl = &timeline->engine[id];
-
-		if (!i915_seqno_passed(seqno, tl->seqno)) {
-			/* spin until threads are complete */
-			while (intel_breadcrumbs_busy(engine))
-				cond_resched();
-		}
-
-		/* Finally reset hw state */
-		tl->seqno = seqno;
-		intel_engine_init_global_seqno(engine, seqno);
-	}
-
-	list_for_each_entry(timeline, &i915->gt.timelines, link) {
-		for_each_engine(engine, i915, id) {
-			struct intel_timeline *tl = &timeline->engine[id];
-
-			memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno));
-		}
-	}
-
-	return 0;
-}
-
-int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	if (seqno == 0)
-		return -EINVAL;
-
-	/* HWS page needs to be set less than what we
-	 * will inject to ring
-	 */
-	return reset_all_global_seqno(dev_priv, seqno - 1);
-}
-
-static int reserve_seqno(struct intel_engine_cs *engine)
-{
-	u32 active = ++engine->timeline->inflight_seqnos;
-	u32 seqno = engine->timeline->seqno;
-	int ret;
-
-	/* Reservation is fine until we need to wrap around */
-	if (likely(!add_overflows(seqno, active)))
-		return 0;
-
-	/* Even though we are tracking inflight seqno individually on each
-	 * engine, other engines may be observing us using hw semaphores and
-	 * so we need to idle all engines before wrapping around this engine.
-	 * As all engines are then idle, we can reset the seqno on all, so
-	 * we don't stall in quick succession if each engine is being
-	 * similarly utilized.
-	 */
-	ret = reset_all_global_seqno(engine->i915, 0);
-	if (ret) {
-		engine->timeline->inflight_seqnos--;
-		return ret;
-	}
-
-	return 0;
-}
-
 static u32 timeline_get_seqno(struct intel_timeline *tl)
 {
 	return ++tl->seqno;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 03/15] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
  2017-02-22 11:45 ` [PATCH v2 01/15] drm/i915: Keep a global seqno per-engine Chris Wilson
  2017-02-22 11:45 ` [PATCH v2 02/15] drm/i915: Move reeerve_seqno() next to unreserve_seqno() Chris Wilson
@ 2017-02-22 11:45 ` Chris Wilson
  2017-02-22 11:45 ` [PATCH v2 04/15] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:45 UTC (permalink / raw)
  To: intel-gfx

Use a local variable to avoid having to type out the full name of the
gpu_error wait_queue.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 31c454089c87..c1a1da3ee0d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1078,6 +1078,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
 	DEFINE_WAIT(reset);
 	struct intel_wait wait;
 
@@ -1113,7 +1114,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	set_current_state(state);
 	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+		add_wait_queue(errq, &reset);
 
 	intel_wait_init(&wait, req->global_seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
@@ -1164,8 +1165,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		    i915_reset_in_progress(&req->i915->gpu_error)) {
 			__set_current_state(TASK_RUNNING);
 			i915_reset(req->i915);
-			reset_wait_queue(&req->i915->gpu_error.wait_queue,
-					 &reset);
+			reset_wait_queue(errq, &reset);
 			continue;
 		}
 
@@ -1176,7 +1176,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	intel_engine_remove_wait(req->engine, &wait);
 	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+		remove_wait_queue(errq, &reset);
 	__set_current_state(TASK_RUNNING);
 
 complete:
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 04/15] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (2 preceding siblings ...)
  2017-02-22 11:45 ` [PATCH v2 03/15] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
@ 2017-02-22 11:45 ` Chris Wilson
  2017-02-22 11:46 ` [PATCH v2 05/15] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
                   ` (12 subsequent siblings)
  16 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:45 UTC (permalink / raw)
  To: intel-gfx

Add ourselves to the gpu error waitqueue earlier on, even before we
determine we have to wait on the seqno. This is so that we can then
share the waitqueue between stages in subsequent patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index c1a1da3ee0d7..ed1d42e7a206 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1098,6 +1098,9 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	trace_i915_gem_request_wait_begin(req, flags);
 
+	if (flags & I915_WAIT_LOCKED)
+		add_wait_queue(errq, &reset);
+
 	if (!i915_sw_fence_done(&req->execute)) {
 		timeout = __i915_request_wait_for_execute(req, flags, timeout);
 		if (timeout < 0)
@@ -1113,9 +1116,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(errq, &reset);
-
 	intel_wait_init(&wait, req->global_seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
 		/* In order to check that we haven't missed the interrupt
@@ -1175,11 +1175,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	}
 
 	intel_engine_remove_wait(req->engine, &wait);
-	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(errq, &reset);
 	__set_current_state(TASK_RUNNING);
 
 complete:
+	if (flags & I915_WAIT_LOCKED)
+		remove_wait_queue(errq, &reset);
 	trace_i915_gem_request_wait_end(req);
 
 	return timeout;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 05/15] drm/i915: Inline __i915_gem_request_wait_for_execute()
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (3 preceding siblings ...)
  2017-02-22 11:45 ` [PATCH v2 04/15] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 11:46 ` [PATCH v2 06/15] drm/i915: Deconstruct execute fence Chris Wilson
                   ` (11 subsequent siblings)
  16 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

It had only one callsite and existed to keep the code clearer. Now
having shared the wait-on-error between phases and with plans to change
the wait-for-execute in the next few patches, remove the out of line
wait loop and move it into the main body of i915_wait_request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 78 ++++++++++++---------------------
 1 file changed, 29 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index ed1d42e7a206..88139dbf8fcc 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1005,54 +1005,6 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	return false;
 }
 
-static long
-__i915_request_wait_for_execute(struct drm_i915_gem_request *request,
-				unsigned int flags,
-				long timeout)
-{
-	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
-		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-	wait_queue_head_t *q = &request->i915->gpu_error.wait_queue;
-	DEFINE_WAIT(reset);
-	DEFINE_WAIT(wait);
-
-	if (flags & I915_WAIT_LOCKED)
-		add_wait_queue(q, &reset);
-
-	do {
-		prepare_to_wait(&request->execute.wait, &wait, state);
-
-		if (i915_sw_fence_done(&request->execute))
-			break;
-
-		if (flags & I915_WAIT_LOCKED &&
-		    i915_reset_in_progress(&request->i915->gpu_error)) {
-			__set_current_state(TASK_RUNNING);
-			i915_reset(request->i915);
-			reset_wait_queue(q, &reset);
-			continue;
-		}
-
-		if (signal_pending_state(state, current)) {
-			timeout = -ERESTARTSYS;
-			break;
-		}
-
-		if (!timeout) {
-			timeout = -ETIME;
-			break;
-		}
-
-		timeout = io_schedule_timeout(timeout);
-	} while (1);
-	finish_wait(&request->execute.wait, &wait);
-
-	if (flags & I915_WAIT_LOCKED)
-		remove_wait_queue(q, &reset);
-
-	return timeout;
-}
-
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: the request to wait upon
@@ -1102,7 +1054,35 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		add_wait_queue(errq, &reset);
 
 	if (!i915_sw_fence_done(&req->execute)) {
-		timeout = __i915_request_wait_for_execute(req, flags, timeout);
+		DEFINE_WAIT(exec);
+
+		do {
+			prepare_to_wait(&req->execute.wait, &exec, state);
+			if (i915_sw_fence_done(&req->execute))
+				break;
+
+			if (flags & I915_WAIT_LOCKED &&
+			    i915_reset_in_progress(&req->i915->gpu_error)) {
+				__set_current_state(TASK_RUNNING);
+				i915_reset(req->i915);
+				reset_wait_queue(errq, &reset);
+				continue;
+			}
+
+			if (signal_pending_state(state, current)) {
+				timeout = -ERESTARTSYS;
+				break;
+			}
+
+			if (!timeout) {
+				timeout = -ETIME;
+				break;
+			}
+
+			timeout = io_schedule_timeout(timeout);
+		} while (1);
+		finish_wait(&req->execute.wait, &exec);
+
 		if (timeout < 0)
 			goto complete;
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 06/15] drm/i915: Deconstruct execute fence
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (4 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 05/15] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 11:46 ` [PATCH v2 07/15] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

On reflection, we are only using the execute fence as a waitqueue on the
global_seqno and not using it for dependency tracking between fences
(unlike the submit and dma fences). By only treating it as a waitqueue,
we can then treat it similar to the other waitqueues during submit,
making the code simpler.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 50 ++++++++-------------------------
 drivers/gpu/drm/i915/i915_gem_request.h | 10 +------
 2 files changed, 13 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 88139dbf8fcc..477e8fc125ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -69,7 +69,6 @@ static void i915_fence_release(struct dma_fence *fence)
 	 * caught trying to reuse dead objects.
 	 */
 	i915_sw_fence_fini(&req->submit);
-	i915_sw_fence_fini(&req->execute);
 
 	kmem_cache_free(req->i915->requests, req);
 }
@@ -294,7 +293,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
-	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
 	GEM_BUG_ON(!i915_gem_request_completed(request));
 	GEM_BUG_ON(!request->i915->gt.active_requests);
 
@@ -402,6 +400,8 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	struct intel_timeline *timeline;
 	u32 seqno;
 
+	trace_i915_gem_request_execute(request);
+
 	/* Transfer from per-context onto the global per-engine timeline */
 	timeline = engine->timeline;
 	GEM_BUG_ON(timeline == request->timeline);
@@ -426,8 +426,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	list_move_tail(&request->link, &timeline->requests);
 	spin_unlock(&request->timeline->lock);
 
-	i915_sw_fence_commit(&request->execute);
-	trace_i915_gem_request_execute(request);
+	wake_up_all(&request->execute);
 }
 
 void i915_gem_request_submit(struct drm_i915_gem_request *request)
@@ -463,24 +462,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
-static int __i915_sw_fence_call
-execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
-{
-	struct drm_i915_gem_request *request =
-		container_of(fence, typeof(*request), execute);
-
-	switch (state) {
-	case FENCE_COMPLETE:
-		break;
-
-	case FENCE_FREE:
-		i915_gem_request_put(request);
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
 /**
  * i915_gem_request_alloc - allocate a request structure
  *
@@ -573,13 +554,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
-	i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify);
-
-	/* Ensure that the execute fence completes after the submit fence -
-	 * as we complete the execute fence from within the submit fence
-	 * callback, its completion would otherwise be visible first.
-	 */
-	i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq);
+	init_waitqueue_head(&req->execute);
 
 	i915_priotree_init(&req->priotree);
 
@@ -1032,6 +1007,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
 	DEFINE_WAIT(reset);
+	DEFINE_WAIT(exec);
 	struct intel_wait wait;
 
 	might_sleep();
@@ -1053,12 +1029,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
-	if (!i915_sw_fence_done(&req->execute)) {
-		DEFINE_WAIT(exec);
-
+	reset_wait_queue(&req->execute, &exec);
+	if (!req->global_seqno) {
 		do {
-			prepare_to_wait(&req->execute.wait, &exec, state);
-			if (i915_sw_fence_done(&req->execute))
+			set_current_state(state);
+			if (req->global_seqno)
 				break;
 
 			if (flags & I915_WAIT_LOCKED &&
@@ -1081,15 +1056,14 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 			timeout = io_schedule_timeout(timeout);
 		} while (1);
-		finish_wait(&req->execute.wait, &exec);
+		finish_wait(&req->execute, &exec);
 
 		if (timeout < 0)
 			goto complete;
 
-		GEM_BUG_ON(!i915_sw_fence_done(&req->execute));
+		GEM_BUG_ON(!req->global_seqno);
 	}
-	GEM_BUG_ON(!i915_sw_fence_done(&req->submit));
-	GEM_BUG_ON(!req->global_seqno);
+	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
 	/* Optimistic short spin before touching IRQs */
 	if (i915_spin_request(req, state, 5))
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 9049936c571c..467d3e13fce0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -119,18 +119,10 @@ struct drm_i915_gem_request {
 	 * The submit fence is used to await upon all of the request's
 	 * dependencies. When it is signaled, the request is ready to run.
 	 * It is used by the driver to then queue the request for execution.
-	 *
-	 * The execute fence is used to signal when the request has been
-	 * sent to hardware.
-	 *
-	 * It is illegal for the submit fence of one request to wait upon the
-	 * execute fence of an earlier request. It should be sufficient to
-	 * wait upon the submit fence of the earlier request.
 	 */
 	struct i915_sw_fence submit;
-	struct i915_sw_fence execute;
 	wait_queue_t submitq;
-	wait_queue_t execq;
+	wait_queue_head_t execute;
 
 	/* A list of everyone we wait upon, and everyone who waits upon us.
 	 * Even though we will not be submitted to the hardware before the
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 07/15] drm/i915: Protect the request->global_seqno with the engine->timeline lock
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (5 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 06/15] drm/i915: Deconstruct execute fence Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 12:29   ` Tvrtko Ursulin
  2017-02-22 11:46 ` [PATCH v2 08/15] drm/i915: Take a reference whilst processing the signaler request Chris Wilson
                   ` (9 subsequent siblings)
  16 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

A request is assigned a global seqno only when it is on the hardware
execution queue. The global seqno can be used to maintain a list of
requests on the same engine in retirement order, for example for
constructing a priority queue for waiting. Prior to its execution, or
if it is subsequently removed in the event of preemption, its global
seqno is zero. As both insertion and removal from the execution queue
may operate in IRQ context, it is not guarded by the usual struct_mutex
BKL. Instead those relying on the global seqno must be prepared for its
value to change between reads. Only when the request is complete can
the global seqno be stable (due to the memory barriers on submitting
the commands to the hardware to write the breadcrumb, if the HWS shows
that it has passed the global seqno and the global seqno is unchanged
after the read, it is indeed complete).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          | 16 ++++++--
 drivers/gpu/drm/i915/i915_gem.c          | 16 +++++---
 drivers/gpu/drm/i915/i915_gem_request.c  | 46 ++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_request.h  | 66 +++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 ++++--
 5 files changed, 114 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 994929660027..3e4feeaeef60 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4022,14 +4022,24 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 }
 
 static inline bool
-__i915_request_irq_complete(struct drm_i915_gem_request *req)
+__i915_request_irq_complete(const struct drm_i915_gem_request *req)
 {
 	struct intel_engine_cs *engine = req->engine;
+	u32 seqno = i915_gem_request_global_seqno(req);
+
+	/* The request was dequeued before we were awoken. We check after
+	 * inspecting the hw to confirm that this was the same request
+	 * that generated the HWS update. The memory barriers within
+	 * the request execution are sufficient to ensure that a check
+	 * after reading the value from hw matches this request.
+	 */
+	if (!seqno)
+		return false;
 
 	/* Before we do the heavier coherent read of the seqno,
 	 * check the value (hopefully) in the CPU cacheline.
 	 */
-	if (__i915_gem_request_completed(req))
+	if (__i915_gem_request_completed(req, seqno))
 		return true;
 
 	/* Ensure our read of the seqno is coherent so that we
@@ -4080,7 +4090,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
 			wake_up_process(tsk);
 		rcu_read_unlock();
 
-		if (__i915_gem_request_completed(req))
+		if (__i915_gem_request_completed(req, seqno))
 			return true;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fce2cec8e665..f950cedb6516 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -397,7 +397,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
 		i915_gem_request_retire_upto(rq);
 
-	if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
+	if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
 		/* The GPU is now idle and this client has stalled.
 		 * Since no other client has submitted a request in the
 		 * meantime, assume that this client is the only one
@@ -2609,7 +2609,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
 struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *request;
+	struct drm_i915_gem_request *request, *active = NULL;
+	unsigned long flags;
 
 	/* We are called by the error capture and reset at a random
 	 * point in time. In particular, note that neither is crucially
@@ -2619,17 +2620,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	 * extra delay for a recent interrupt is pointless. Hence, we do
 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
 	 */
+	spin_lock_irqsave(&engine->timeline->lock, flags);
 	list_for_each_entry(request, &engine->timeline->requests, link) {
-		if (__i915_gem_request_completed(request))
+		if (__i915_gem_request_completed(request,
+						 request->global_seqno))
 			continue;
 
 		GEM_BUG_ON(request->engine != engine);
 		GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 				    &request->fence.flags));
-		return request;
+
+		active = request;
+		break;
 	}
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 
-	return NULL;
+	return active;
 }
 
 static bool engine_stalled(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 477e8fc125ce..d18f450977e0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -418,7 +418,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 		intel_engine_enable_signaling(request);
 	spin_unlock(&request->lock);
 
-	GEM_BUG_ON(!request->global_seqno);
 	engine->emit_breadcrumb(request,
 				request->ring->vaddr + request->postfix);
 
@@ -505,7 +504,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->timeline->requests,
 				       typeof(*req), link);
-	if (req && __i915_gem_request_completed(req))
+	if (req && i915_gem_request_completed(req))
 		i915_gem_request_retire(req);
 
 	/* Beware: Dragons be flying overhead.
@@ -611,6 +610,7 @@ static int
 i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			       struct drm_i915_gem_request *from)
 {
+	u32 seqno;
 	int ret;
 
 	GEM_BUG_ON(to == from);
@@ -633,14 +633,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 		return ret < 0 ? ret : 0;
 	}
 
-	if (!from->global_seqno) {
+	seqno = i915_gem_request_global_seqno(from);
+	if (!seqno) {
 		ret = i915_sw_fence_await_dma_fence(&to->submit,
 						    &from->fence, 0,
 						    GFP_KERNEL);
 		return ret < 0 ? ret : 0;
 	}
 
-	if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
+	if (seqno <= to->timeline->sync_seqno[from->engine->id])
 		return 0;
 
 	trace_i915_gem_ring_sync_to(to, from);
@@ -658,7 +659,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			return ret;
 	}
 
-	to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
+	to->timeline->sync_seqno[from->engine->id] = seqno;
 	return 0;
 }
 
@@ -939,7 +940,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *req,
-			 int state, unsigned long timeout_us)
+			 u32 seqno, int state, unsigned long timeout_us)
 {
 	struct intel_engine_cs *engine = req->engine;
 	unsigned int irq, cpu;
@@ -957,7 +958,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	irq = atomic_read(&engine->irq_count);
 	timeout_us += local_clock_us(&cpu);
 	do {
-		if (__i915_gem_request_completed(req))
+		if (seqno != i915_gem_request_global_seqno(req))
+			break;
+
+		if (i915_seqno_passed(intel_engine_get_seqno(req->engine),
+				      seqno))
 			return true;
 
 		/* Seqno are meant to be ordered *before* the interrupt. If
@@ -1029,11 +1034,15 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
+	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
+
 	reset_wait_queue(&req->execute, &exec);
-	if (!req->global_seqno) {
+	if (!wait.seqno) {
 		do {
 			set_current_state(state);
-			if (req->global_seqno)
+
+			wait.seqno = i915_gem_request_global_seqno(req);
+			if (wait.seqno)
 				break;
 
 			if (flags & I915_WAIT_LOCKED &&
@@ -1061,7 +1070,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		if (timeout < 0)
 			goto complete;
 
-		GEM_BUG_ON(!req->global_seqno);
+		GEM_BUG_ON(!wait.seqno);
 	}
 	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
@@ -1070,7 +1079,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	intel_wait_init(&wait, req->global_seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
 		/* In order to check that we haven't missed the interrupt
 		 * as we enabled it, we need to kick ourselves to do a
@@ -1091,7 +1099,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 		timeout = io_schedule_timeout(timeout);
 
-		if (intel_wait_complete(&wait))
+		if (intel_wait_complete(&wait) &&
+		    i915_gem_request_global_seqno(req) == wait.seqno)
 			break;
 
 		set_current_state(state);
@@ -1142,14 +1151,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 static void engine_retire_requests(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request, *next;
+	u32 seqno = intel_engine_get_seqno(engine);
+	LIST_HEAD(retire);
 
+	spin_lock_irq(&engine->timeline->lock);
 	list_for_each_entry_safe(request, next,
 				 &engine->timeline->requests, link) {
-		if (!__i915_gem_request_completed(request))
-			return;
+		if (!i915_seqno_passed(seqno, request->global_seqno))
+			break;
 
-		i915_gem_request_retire(request);
+		list_move_tail(&request->link, &retire);
 	}
+	spin_unlock_irq(&engine->timeline->lock);
+
+	list_for_each_entry_safe(request, next, &retire, link)
+		i915_gem_request_retire(request);
 }
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 467d3e13fce0..b81f6709905c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -135,6 +135,11 @@ struct drm_i915_gem_request {
 	struct i915_priotree priotree;
 	struct i915_dependency dep;
 
+	/** GEM sequence number associated with this request on the
+	 * global execution timeline. It is zero when the request is not
+	 * on the HW queue (i.e. not on the engine timeline list).
+	 * Its value is guarded by the timeline spinlock.
+	 */
 	u32 global_seqno;
 
 	/** Position in the ring of the start of the request */
@@ -229,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 	*pdst = src;
 }
 
+/**
+ * i915_gem_request_global_seqno - report the current global seqno
+ * @request - the request
+ *
+ * A request is assigned a global seqno only when it is on the hardware
+ * execution queue. The global seqno can be used to maintain a list of
+ * requests on the same engine in retirement order, for example for
+ * constructing a priority queue for waiting. Prior to its execution, or
+ * if it is subsequently removed in the event of preemption, its global
+ * seqno is zero. As both insertion and removal from the execution queue
+ * may operate in IRQ context, it is not guarded by the usual struct_mutex
+ * BKL. Instead those relying on the global seqno must be prepared for its
+ * value to change between reads. Only when the request is complete can
+ * the global seqno be stable (due to the memory barriers on submitting
+ * the commands to the hardware to write the breadcrumb, if the HWS shows
+ * that it has passed the global seqno and the global seqno is unchanged
+ * after the read, it is indeed complete).
+ */
+static u32
+i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
+{
+	return READ_ONCE(request->global_seqno);
+}
+
 int
 i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
@@ -269,46 +298,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
 }
 
 static inline bool
-__i915_gem_request_started(const struct drm_i915_gem_request *req)
+__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
 {
-	GEM_BUG_ON(!req->global_seqno);
+	GEM_BUG_ON(!seqno);
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->global_seqno - 1);
+				 seqno - 1);
 }
 
 static inline bool
 i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
-	if (!req->global_seqno)
+	u32 seqno;
+
+	seqno = i915_gem_request_global_seqno(req);
+	if (!seqno)
 		return false;
 
-	return __i915_gem_request_started(req);
+	return __i915_gem_request_started(req, seqno);
 }
 
 static inline bool
-__i915_gem_request_completed(const struct drm_i915_gem_request *req)
+__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
 {
-	GEM_BUG_ON(!req->global_seqno);
-	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->global_seqno);
+	GEM_BUG_ON(!seqno);
+	return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
+		seqno == i915_gem_request_global_seqno(req);
 }
 
 static inline bool
 i915_gem_request_completed(const struct drm_i915_gem_request *req)
 {
-	if (!req->global_seqno)
+	u32 seqno;
+
+	seqno = i915_gem_request_global_seqno(req);
+	if (!seqno)
 		return false;
 
-	return __i915_gem_request_completed(req);
+	return __i915_gem_request_completed(req, seqno);
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *request,
-			 int state, unsigned long timeout_us);
+			 u32 seqno, int state, unsigned long timeout_us);
 static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
 				     int state, unsigned long timeout_us)
 {
-	return (__i915_gem_request_started(request) &&
-		__i915_spin_request(request, state, timeout_us));
+	u32 seqno;
+
+	seqno = i915_gem_request_global_seqno(request);
+	return (__i915_gem_request_started(request, seqno) &&
+		__i915_spin_request(request, seqno, state, timeout_us));
 }
 
 /* We treat requests as fences. This is not be to confused with our
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 4f4e703d1b14..d5bf4c0b2deb 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -545,6 +545,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	struct rb_node *parent, **p;
 	bool first, wakeup;
+	u32 seqno;
 
 	/* Note that we may be called from an interrupt handler on another
 	 * device (e.g. nouveau signaling a fence completion causing us
@@ -555,11 +556,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 
 	/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
 	assert_spin_locked(&request->lock);
-	if (!request->global_seqno)
+
+	seqno = i915_gem_request_global_seqno(request);
+	if (!seqno)
 		return;
 
 	request->signaling.wait.tsk = b->signaler;
-	request->signaling.wait.seqno = request->global_seqno;
+	request->signaling.wait.seqno = seqno;
 	i915_gem_request_get(request);
 
 	spin_lock(&b->lock);
@@ -583,8 +586,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	p = &b->signals.rb_node;
 	while (*p) {
 		parent = *p;
-		if (i915_seqno_passed(request->global_seqno,
-				      to_signaler(parent)->global_seqno)) {
+		if (i915_seqno_passed(seqno,
+				      to_signaler(parent)->signaling.wait.seqno)) {
 			p = &parent->rb_right;
 			first = false;
 		} else {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 08/15] drm/i915: Take a reference whilst processing the signaler request
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (6 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 07/15] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 12:35   ` Tvrtko Ursulin
  2017-02-22 11:46 ` [PATCH v2 09/15] drm/i915: Allow an request to be cancelled Chris Wilson
                   ` (8 subsequent siblings)
  16 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

The plan in the near-future is to allow requests to be removed from the
signaler. We can no longer then rely on holding a reference to the
request for the duration it is in the signaling tree, and instead must
obtain a reference to the request for the current operation using RCU.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 24 ++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index d5bf4c0b2deb..62e6b8181200 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -496,7 +496,11 @@ static int intel_breadcrumbs_signaler(void *arg)
 		 * need to wait for a new interrupt from the GPU or for
 		 * a new client.
 		 */
-		request = READ_ONCE(b->first_signal);
+		rcu_read_lock();
+		request = rcu_dereference(b->first_signal);
+		if (request)
+			request = i915_gem_request_get_rcu(request);
+		rcu_read_unlock();
 		if (signal_complete(request)) {
 			local_bh_disable();
 			dma_fence_signal(&request->fence);
@@ -515,24 +519,28 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 * the oldest before picking the next one.
 			 */
 			spin_lock_irq(&b->lock);
-			if (request == b->first_signal) {
+			if (request == rcu_access_pointer(b->first_signal)) {
 				struct rb_node *rb =
 					rb_next(&request->signaling.node);
-				b->first_signal = rb ? to_signaler(rb) : NULL;
+				rcu_assign_pointer(b->first_signal,
+						   rb ? to_signaler(rb) : NULL);
 			}
 			rb_erase(&request->signaling.node, &b->signals);
 			spin_unlock_irq(&b->lock);
 
 			i915_gem_request_put(request);
 		} else {
-			if (kthread_should_stop())
+			if (kthread_should_stop()) {
+				GEM_BUG_ON(request);
 				break;
+			}
 
 			schedule();
 
 			if (kthread_should_park())
 				kthread_parkme();
 		}
+		i915_gem_request_put(request);
 	} while (1);
 	__set_current_state(TASK_RUNNING);
 
@@ -597,7 +605,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	rb_link_node(&request->signaling.node, parent, p);
 	rb_insert_color(&request->signaling.node, &b->signals);
 	if (first)
-		smp_store_mb(b->first_signal, request);
+		rcu_assign_pointer(b->first_signal, request);
 
 	spin_unlock(&b->lock);
 
@@ -670,7 +678,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 	/* The engines should be idle and all requests accounted for! */
 	WARN_ON(READ_ONCE(b->first_wait));
 	WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
-	WARN_ON(READ_ONCE(b->first_signal));
+	WARN_ON(rcu_access_pointer(b->first_signal));
 	WARN_ON(!RB_EMPTY_ROOT(&b->signals));
 
 	if (!IS_ERR_OR_NULL(b->signaler))
@@ -686,8 +694,8 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&b->lock);
 
-	if (b->first_wait) {
-		wake_up_process(b->first_wait->tsk);
+	if (rcu_access_pointer(b->first_signal)) {
+		wake_up_process(b->signaler);
 		busy |= intel_engine_flag(engine);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b91c2c7ef5a6..3fcb9dd19b07 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -242,7 +242,7 @@ struct intel_engine_cs {
 		struct rb_root signals; /* sorted by retirement */
 		struct intel_wait *first_wait; /* oldest waiter by retirement */
 		struct task_struct *signaler; /* used for fence signalling */
-		struct drm_i915_gem_request *first_signal;
+		struct drm_i915_gem_request __rcu *first_signal;
 		struct timer_list fake_irq; /* used after a missed interrupt */
 		struct timer_list hangcheck; /* detect missed interrupts */
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 09/15] drm/i915: Allow an request to be cancelled
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (7 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 08/15] drm/i915: Take a reference whilst processing the signaler request Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 13:08   ` Tvrtko Ursulin
  2017-02-22 11:46 ` [PATCH v2 10/15] drm/i915: Remove the preempted request from the execution queue Chris Wilson
                   ` (7 subsequent siblings)
  16 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

If we preempt a request and remove it from the execution queue, we need
to undo its global seqno and restart any waiters.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 71 +++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 +
 2 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 62e6b8181200..882e601ebb09 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -356,22 +356,15 @@ static inline int wakeup_priority(struct intel_breadcrumbs *b,
 		return tsk->prio;
 }
 
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-			      struct intel_wait *wait)
+static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
+				       struct intel_wait *wait)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	/* Quick check to see if this waiter was already decoupled from
-	 * the tree by the bottom-half to avoid contention on the spinlock
-	 * by the herd.
-	 */
-	if (RB_EMPTY_NODE(&wait->node))
-		return;
-
-	spin_lock_irq(&b->lock);
+	assert_spin_locked(&b->lock);
 
 	if (RB_EMPTY_NODE(&wait->node))
-		goto out_unlock;
+		goto out;
 
 	if (b->first_wait == wait) {
 		const int priority = wakeup_priority(b, wait->tsk);
@@ -436,11 +429,27 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 	GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
 	rb_erase(&wait->node, &b->waiters);
 
-out_unlock:
+out:
 	GEM_BUG_ON(b->first_wait == wait);
 	GEM_BUG_ON(rb_first(&b->waiters) !=
 		   (b->first_wait ? &b->first_wait->node : NULL));
 	GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
+}
+
+void intel_engine_remove_wait(struct intel_engine_cs *engine,
+			      struct intel_wait *wait)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	/* Quick check to see if this waiter was already decoupled from
+	 * the tree by the bottom-half to avoid contention on the spinlock
+	 * by the herd.
+	 */
+	if (RB_EMPTY_NODE(&wait->node))
+		return;
+
+	spin_lock_irq(&b->lock);
+	__intel_engine_remove_wait(engine, wait);
 	spin_unlock_irq(&b->lock);
 }
 
@@ -506,11 +515,13 @@ static int intel_breadcrumbs_signaler(void *arg)
 			dma_fence_signal(&request->fence);
 			local_bh_enable(); /* kick start the tasklets */
 
+			spin_lock_irq(&b->lock);
+
 			/* Wake up all other completed waiters and select the
 			 * next bottom-half for the next user interrupt.
 			 */
-			intel_engine_remove_wait(engine,
-						 &request->signaling.wait);
+			__intel_engine_remove_wait(engine,
+						   &request->signaling.wait);
 
 			/* Find the next oldest signal. Note that as we have
 			 * not been holding the lock, another client may
@@ -518,7 +529,6 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 * we just completed - so double check we are still
 			 * the oldest before picking the next one.
 			 */
-			spin_lock_irq(&b->lock);
 			if (request == rcu_access_pointer(b->first_signal)) {
 				struct rb_node *rb =
 					rb_next(&request->signaling.node);
@@ -526,6 +536,8 @@ static int intel_breadcrumbs_signaler(void *arg)
 						   rb ? to_signaler(rb) : NULL);
 			}
 			rb_erase(&request->signaling.node, &b->signals);
+			RB_CLEAR_NODE(&request->signaling.node);
+
 			spin_unlock_irq(&b->lock);
 
 			i915_gem_request_put(request);
@@ -613,6 +625,35 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 		wake_up_process(b->signaler);
 }
 
+void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	assert_spin_locked(&request->lock);
+	GEM_BUG_ON(!request->signaling.wait.seqno);
+
+	spin_lock(&b->lock);
+
+	if (!RB_EMPTY_NODE(&request->signaling.node)) {
+		if (request == rcu_access_pointer(b->first_signal)) {
+			struct rb_node *rb =
+				rb_next(&request->signaling.node);
+			rcu_assign_pointer(b->first_signal,
+					   rb ? to_signaler(rb) : NULL);
+		}
+		rb_erase(&request->signaling.node, &b->signals);
+		RB_CLEAR_NODE(&request->signaling.node);
+	}
+
+	__intel_engine_remove_wait(engine, &request->signaling.wait);
+
+	spin_unlock(&b->lock);
+
+	request->signaling.wait.seqno = 0;
+	i915_gem_request_put(request);
+}
+
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3fcb9dd19b07..45d2c2fa946e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -598,6 +598,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait);
 void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
+void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
 
 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
 {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 10/15] drm/i915: Remove the preempted request from the execution queue
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (8 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 09/15] drm/i915: Allow an request to be cancelled Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 13:33   ` Tvrtko Ursulin
  2017-02-22 11:46 ` [PATCH v2 11/15] drm/i915: Exercise request cancellation using a mock selftest Chris Wilson
                   ` (6 subsequent siblings)
  16 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

After the request is cancelled, we then need to remove it from the
global execution timeline and return it to the context timeline, the
inverse of submit_request().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c            | 58 +++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_request.h            |  3 ++
 drivers/gpu/drm/i915/intel_breadcrumbs.c           | 19 ++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h            |  6 ---
 drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c |  6 +++
 5 files changed, 83 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index d18f450977e0..97116e492d01 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -441,6 +441,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
+void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_timeline *timeline;
+
+	assert_spin_locked(&engine->timeline->lock);
+
+	/* Only unwind in reverse order, required so that the per-context list
+	 * is kept in seqno/ring order.
+	 */
+	GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
+	engine->timeline->seqno--;
+
+	/* We may be recursing from the signal callback of another i915 fence */
+	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+	request->global_seqno = 0;
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+		intel_engine_cancel_signaling(request);
+	spin_unlock(&request->lock);
+
+	/* Transfer back from the global per-engine timeline to per-context */
+	timeline = request->timeline;
+	GEM_BUG_ON(timeline == engine->timeline);
+
+	spin_lock(&timeline->lock);
+	list_move(&request->link, &timeline->requests);
+	spin_unlock(&timeline->lock);
+
+	/* We don't need to wake_up any waiters on request->execute, they
+	 * will get woken by any other event or us re-adding this request
+	 * to the engine timeline (__i915_gem_request_submit()). The waiters
+	 * should be quite adapt at finding that the request now has a new
+	 * global_seqno to the one they went to sleep on.
+	 */
+}
+
+void i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	unsigned long flags;
+
+	/* Will be called from irq-context when using foreign fences. */
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+
+	__i915_gem_request_unsubmit(request);
+
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+}
+
 static int __i915_sw_fence_call
 submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 {
@@ -1034,9 +1083,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
-	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
+	wait.tsk = current;
 
+restart:
 	reset_wait_queue(&req->execute, &exec);
+	wait.seqno = i915_gem_request_global_seqno(req);
 	if (!wait.seqno) {
 		do {
 			set_current_state(state);
@@ -1135,6 +1186,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
 			break;
+
+		if (i915_gem_request_global_seqno(req) != wait.seqno) {
+			intel_engine_remove_wait(req->engine, &wait);
+			goto restart;
+		}
 	}
 
 	intel_engine_remove_wait(req->engine, &wait);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index b81f6709905c..5f73d8c0a38a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 void __i915_gem_request_submit(struct drm_i915_gem_request *request);
 void i915_gem_request_submit(struct drm_i915_gem_request *request);
 
+void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
+void i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
+
 struct intel_rps_client;
 #define NO_WAITBOOST ERR_PTR(-1)
 #define IS_RPS_CLIENT(p) (!IS_ERR(p))
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 882e601ebb09..5bcad7872c08 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -453,7 +453,14 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 	spin_unlock_irq(&b->lock);
 }
 
-static bool signal_complete(struct drm_i915_gem_request *request)
+static bool signal_valid(const struct drm_i915_gem_request *request)
+{
+	u32 seqno = READ_ONCE(request->global_seqno);
+
+	return seqno == request->signaling.wait.seqno;
+}
+
+static bool signal_complete(const struct drm_i915_gem_request *request)
 {
 	if (!request)
 		return false;
@@ -462,7 +469,7 @@ static bool signal_complete(struct drm_i915_gem_request *request)
 	 * signalled that this wait is already completed.
 	 */
 	if (intel_wait_complete(&request->signaling.wait))
-		return true;
+		return signal_valid(request);
 
 	/* Carefully check if the request is complete, giving time for the
 	 * seqno to be visible or if the GPU hung.
@@ -542,13 +549,21 @@ static int intel_breadcrumbs_signaler(void *arg)
 
 			i915_gem_request_put(request);
 		} else {
+			DEFINE_WAIT(exec);
+
 			if (kthread_should_stop()) {
 				GEM_BUG_ON(request);
 				break;
 			}
 
+			if (request)
+				add_wait_queue(&request->execute, &exec);
+
 			schedule();
 
+			if (request)
+				remove_wait_queue(&request->execute, &exec);
+
 			if (kthread_should_park())
 				kthread_parkme();
 		}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 45d2c2fa946e..97fde79167a6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -582,12 +582,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
-static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
-{
-	wait->tsk = current;
-	wait->seqno = seqno;
-}
-
 static inline bool intel_wait_complete(const struct intel_wait *wait)
 {
 	return RB_EMPTY_NODE(&wait->node);
diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
index 6426acc9fdca..62c020c7ea80 100644
--- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
@@ -28,6 +28,12 @@
 #include "mock_gem_device.h"
 #include "mock_engine.h"
 
+static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
+{
+	wait->tsk = current;
+	wait->seqno = seqno;
+}
+
 static int check_rbtree(struct intel_engine_cs *engine,
 			const unsigned long *bitmap,
 			const struct intel_wait *waiters,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 11/15] drm/i915: Exercise request cancellation using a mock selftest
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (9 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 10/15] drm/i915: Remove the preempted request from the execution queue Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 13:46   ` Tvrtko Ursulin
  2017-02-22 11:46 ` [PATCH v2 12/15] drm/i915: Replace reset_wait_queue with default_wake_function Chris Wilson
                   ` (5 subsequent siblings)
  16 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

Add a mock selftest to preempt a request and check that we cancel it,
requeue the request and then complete its execution.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/selftests/i915_gem_request.c | 59 +++++++++++++++++++++++
 drivers/gpu/drm/i915/selftests/mock_request.c     | 19 ++++++++
 drivers/gpu/drm/i915/selftests/mock_request.h     |  2 +
 3 files changed, 80 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 9d056a86723d..c803ef60a127 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -26,6 +26,7 @@
 
 #include "../i915_selftest.h"
 
+#include "mock_context.h"
 #include "mock_gem_device.h"
 
 static int igt_add_request(void *arg)
@@ -181,12 +182,70 @@ static int igt_fence_wait(void *arg)
 	return err;
 }
 
+static int igt_request_rewind(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_request *request, *vip;
+	struct i915_gem_context *ctx[2];
+	int err = -EINVAL;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	ctx[0] = mock_context(i915, "A");
+	request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ);
+	if (!request) {
+		err = -ENOMEM;
+		goto err_device;
+	}
+	i915_add_request(request);
+
+	ctx[1] = mock_context(i915, "B");
+	vip = mock_request(i915->engine[RCS], ctx[1], 0);
+	if (!vip) {
+		err = -ENOMEM;
+		goto err_locked;
+	}
+
+	/* Simulate preemption by manual reordering */
+	if (!mock_cancel_request(request)) {
+		pr_err("failed to cancel request (already executed)!\n");
+		i915_add_request(vip);
+		goto err_locked;
+	}
+	i915_add_request(vip);
+	request->engine->submit_request(request);
+
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (i915_wait_request(vip, 0, HZ) == -ETIME) {
+		pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n",
+		       vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS]));
+		goto err;
+	}
+
+	if (i915_gem_request_completed(request)) {
+		pr_err("low priority request already completed\n");
+		goto err;
+	}
+
+	err = 0;
+err:
+	mutex_lock(&i915->drm.struct_mutex);
+err_locked:
+	mock_context_close(ctx[1]);
+	mock_context_close(ctx[0]);
+err_device:
+	mock_device_flush(i915);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int i915_gem_request_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_add_request),
 		SUBTEST(igt_wait_request),
 		SUBTEST(igt_fence_wait),
+		SUBTEST(igt_request_rewind),
 	};
 	struct drm_i915_private *i915;
 	int err;
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index e23242d1b88a..0e8d2e7f8c70 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include "mock_engine.h"
 #include "mock_request.h"
 
 struct drm_i915_gem_request *
@@ -42,3 +43,21 @@ mock_request(struct intel_engine_cs *engine,
 
 	return &mock->base;
 }
+
+bool mock_cancel_request(struct drm_i915_gem_request *request)
+{
+	struct mock_request *mock = container_of(request, typeof(*mock), base);
+	struct mock_engine *engine =
+		container_of(request->engine, typeof(*engine), base);
+	bool was_queued;
+
+	spin_lock_irq(&engine->hw_lock);
+	was_queued = !list_empty(&mock->link);
+	list_del_init(&mock->link);
+	spin_unlock_irq(&engine->hw_lock);
+
+	if (was_queued)
+		i915_gem_request_unsubmit(request);
+
+	return was_queued;
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h
index cc76d4f4eb4e..4dea74c8e96d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.h
+++ b/drivers/gpu/drm/i915/selftests/mock_request.h
@@ -41,4 +41,6 @@ mock_request(struct intel_engine_cs *engine,
 	     struct i915_gem_context *context,
 	     unsigned long delay);
 
+bool mock_cancel_request(struct drm_i915_gem_request *request);
+
 #endif /* !__MOCK_REQUEST__ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 12/15] drm/i915: Replace reset_wait_queue with default_wake_function
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (10 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 11/15] drm/i915: Exercise request cancellation using a mock selftest Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 14:13   ` Tvrtko Ursulin
  2017-02-22 11:46 ` [PATCH v2 13/15] drm/i915: Refactor direct GPU reset from request waiters Chris Wilson
                   ` (4 subsequent siblings)
  16 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

If we change the wait_queue_t from using the autoremove_wake_function to
the default_wake_function, we no longer have to restore the wait_queue_t
entry on the wait_queue_head_t list after being woken up by it, as we
are unusual in sleeping multiple times on the same wait_queue_t.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 97116e492d01..42250eaf56ec 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -946,16 +946,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 }
 
-static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list))
-		__add_wait_queue(q, wait);
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-
 static unsigned long local_clock_us(unsigned int *cpu)
 {
 	unsigned long t;
@@ -1060,8 +1050,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
-	DEFINE_WAIT(reset);
-	DEFINE_WAIT(exec);
+	DEFINE_WAIT_FUNC(reset, default_wake_function);
+	DEFINE_WAIT_FUNC(exec, default_wake_function);
 	struct intel_wait wait;
 
 	might_sleep();
@@ -1080,13 +1070,13 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	trace_i915_gem_request_wait_begin(req, flags);
 
+	add_wait_queue(&req->execute, &exec);
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
 	wait.tsk = current;
 
 restart:
-	reset_wait_queue(&req->execute, &exec);
 	wait.seqno = i915_gem_request_global_seqno(req);
 	if (!wait.seqno) {
 		do {
@@ -1100,26 +1090,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 			    i915_reset_in_progress(&req->i915->gpu_error)) {
 				__set_current_state(TASK_RUNNING);
 				i915_reset(req->i915);
-				reset_wait_queue(errq, &reset);
 				continue;
 			}
 
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
-				break;
+				goto complete;
 			}
 
 			if (!timeout) {
 				timeout = -ETIME;
-				break;
+				goto complete;
 			}
 
 			timeout = io_schedule_timeout(timeout);
 		} while (1);
-		finish_wait(&req->execute, &exec);
-
-		if (timeout < 0)
-			goto complete;
 
 		GEM_BUG_ON(!wait.seqno);
 	}
@@ -1179,7 +1164,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		    i915_reset_in_progress(&req->i915->gpu_error)) {
 			__set_current_state(TASK_RUNNING);
 			i915_reset(req->i915);
-			reset_wait_queue(errq, &reset);
 			continue;
 		}
 
@@ -1194,11 +1178,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	}
 
 	intel_engine_remove_wait(req->engine, &wait);
-	__set_current_state(TASK_RUNNING);
-
 complete:
+	__set_current_state(TASK_RUNNING);
 	if (flags & I915_WAIT_LOCKED)
 		remove_wait_queue(errq, &reset);
+	remove_wait_queue(&req->execute, &exec);
 	trace_i915_gem_request_wait_end(req);
 
 	return timeout;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 13/15] drm/i915: Refactor direct GPU reset from request waiters
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (11 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 12/15] drm/i915: Replace reset_wait_queue with default_wake_function Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 14:16   ` Tvrtko Ursulin
  2017-02-22 11:46 ` [PATCH v2 14/15] drm/i915: Immediately process a reset before starting waiting Chris Wilson
                   ` (3 subsequent siblings)
  16 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

Combine the common code for the pair of waiters into a single function.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 42250eaf56ec..66d292384e60 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1024,6 +1024,16 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	return false;
 }
 
+static bool __i915_reset_request(struct drm_i915_gem_request *request)
+{
+	if (likely(!i915_reset_in_progress(&request->i915->gpu_error)))
+		return false;
+
+	__set_current_state(TASK_RUNNING);
+	i915_reset(request->i915);
+	return true;
+}
+
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: the request to wait upon
@@ -1087,11 +1097,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 				break;
 
 			if (flags & I915_WAIT_LOCKED &&
-			    i915_reset_in_progress(&req->i915->gpu_error)) {
-				__set_current_state(TASK_RUNNING);
-				i915_reset(req->i915);
+			    __i915_reset_request(req))
 				continue;
-			}
 
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
@@ -1160,12 +1167,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		 * to come along and update the breadcrumb (either directly
 		 * itself, or indirectly by recovering the GPU).
 		 */
-		if (flags & I915_WAIT_LOCKED &&
-		    i915_reset_in_progress(&req->i915->gpu_error)) {
-			__set_current_state(TASK_RUNNING);
-			i915_reset(req->i915);
+		if (flags & I915_WAIT_LOCKED && __i915_reset_request(req))
 			continue;
-		}
 
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 14/15] drm/i915: Immediately process a reset before starting waiting
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (12 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 13/15] drm/i915: Refactor direct GPU reset from request waiters Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 11:46 ` [PATCH v2 15/15] drm/i915: Remove one level of indention from wait-for-execute Chris Wilson
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

As we handoff the GPU reset to the waiter, we need to check we don't
miss a wakeup if it has already been sent prior to us starting the wait.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 66d292384e60..00ec3dbca184 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1081,8 +1081,10 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	trace_i915_gem_request_wait_begin(req, flags);
 
 	add_wait_queue(&req->execute, &exec);
-	if (flags & I915_WAIT_LOCKED)
+	if (flags & I915_WAIT_LOCKED) {
 		add_wait_queue(errq, &reset);
+		__i915_reset_request(req);
+	}
 
 	wait.tsk = current;
 
@@ -1122,7 +1124,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	if (intel_engine_add_wait(req->engine, &wait))
+	if (intel_engine_add_wait(req->engine, &wait) ||
+	    flags & I915_WAIT_LOCKED)
 		/* In order to check that we haven't missed the interrupt
 		 * as we enabled it, we need to kick ourselves to do a
 		 * coherent check on the seqno before we sleep.
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH v2 15/15] drm/i915: Remove one level of indention from wait-for-execute
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (13 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 14/15] drm/i915: Immediately process a reset before starting waiting Chris Wilson
@ 2017-02-22 11:46 ` Chris Wilson
  2017-02-22 14:22   ` Tvrtko Ursulin
  2017-02-22 13:22 ` ✗ Fi.CI.BAT: failure for series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine Patchwork
  2017-02-22 20:52 ` ✗ Fi.CI.BAT: warning for series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine (rev2) Patchwork
  16 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 11:46 UTC (permalink / raw)
  To: intel-gfx

Now that the code is getting simpler, we can reduce the indentation when
waiting for the global_seqno.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 41 +++++++++++++++------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 00ec3dbca184..d7e72aed1bd3 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1089,34 +1089,29 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	wait.tsk = current;
 
 restart:
-	wait.seqno = i915_gem_request_global_seqno(req);
-	if (!wait.seqno) {
-		do {
-			set_current_state(state);
-
-			wait.seqno = i915_gem_request_global_seqno(req);
-			if (wait.seqno)
-				break;
+	do {
+		set_current_state(state);
+		wait.seqno = i915_gem_request_global_seqno(req);
+		if (wait.seqno)
+			break;
 
-			if (flags & I915_WAIT_LOCKED &&
-			    __i915_reset_request(req))
-				continue;
+		if (flags & I915_WAIT_LOCKED && __i915_reset_request(req))
+			continue;
 
-			if (signal_pending_state(state, current)) {
-				timeout = -ERESTARTSYS;
-				goto complete;
-			}
+		if (signal_pending_state(state, current)) {
+			timeout = -ERESTARTSYS;
+			goto complete;
+		}
 
-			if (!timeout) {
-				timeout = -ETIME;
-				goto complete;
-			}
+		if (!timeout) {
+			timeout = -ETIME;
+			goto complete;
+		}
 
-			timeout = io_schedule_timeout(timeout);
-		} while (1);
+		timeout = io_schedule_timeout(timeout);
+	} while (1);
 
-		GEM_BUG_ON(!wait.seqno);
-	}
+	GEM_BUG_ON(!wait.seqno);
 	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
 	/* Optimistic short spin before touching IRQs */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 02/15] drm/i915: Move reeerve_seqno() next to unreserve_seqno()
  2017-02-22 11:45 ` [PATCH v2 02/15] drm/i915: Move reeerve_seqno() next to unreserve_seqno() Chris Wilson
@ 2017-02-22 12:23   ` Joonas Lahtinen
  0 siblings, 0 replies; 39+ messages in thread
From: Joonas Lahtinen @ 2017-02-22 12:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Patch title s/reeerve/reserve/.

On ke, 2017-02-22 at 11:45 +0000, Chris Wilson wrote:
> Move the companion functions next to each other.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 01/15] drm/i915: Keep a global seqno per-engine
  2017-02-22 11:45 ` [PATCH v2 01/15] drm/i915: Keep a global seqno per-engine Chris Wilson
@ 2017-02-22 12:24   ` Tvrtko Ursulin
  0 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 12:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 11:45, Chris Wilson wrote:
> Replace the global device seqno with one for each engine, and account
> for in-flight seqno on each separately. This is consistent with
> dma-fence as each timeline has separate fence-contexts for each engine
> and a seqno is only ordered within a fence-context (i.e.  seqno do not
> need to be ordered wrt to other engines, just ordered within a single
> engine). This is required to enable request rewinding for preemption on
> individual engines (we have to rewind the global seqno to avoid
> overflow, and we do not have to rewind all engines just to preempt one.)
>
> v2: Rename active_seqno to inflight_seqnos to more clearly indicate that
> it is a counter and not equivalent to the existing seqno. Update
> functions that operated on active_seqno similarly.

Missed a couple details but never mind.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      | 11 +----
>  drivers/gpu/drm/i915/i915_gem_request.c  | 83 +++++++++++++++++---------------
>  drivers/gpu/drm/i915/i915_gem_request.h  |  8 +--
>  drivers/gpu/drm/i915/i915_gem_timeline.h |  9 +++-
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 ++++++-------
>  drivers/gpu/drm/i915/intel_engine_cs.c   |  2 -
>  drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +-
>  7 files changed, 70 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 655e60d609c2..1a28b5279bec 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1080,15 +1080,6 @@ static const struct file_operations i915_error_state_fops = {
>  #endif
>
>  static int
> -i915_next_seqno_get(void *data, u64 *val)
> -{
> -	struct drm_i915_private *dev_priv = data;
> -
> -	*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
> -	return 0;
> -}
> -
> -static int
>  i915_next_seqno_set(void *data, u64 val)
>  {
>  	struct drm_i915_private *dev_priv = data;
> @@ -1106,7 +1097,7 @@ i915_next_seqno_set(void *data, u64 val)
>  }
>
>  DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
> -			i915_next_seqno_get, i915_next_seqno_set,
> +			NULL, i915_next_seqno_set,
>  			"0x%llx\n");
>
>  static int i915_frequency_info(struct seq_file *m, void *unused)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 51bc24d1d7c9..e8b354cf2f06 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -198,6 +198,12 @@ i915_priotree_init(struct i915_priotree *pt)
>  	pt->priority = INT_MIN;
>  }
>
> +static void unreserve_seqno(struct intel_engine_cs *engine)
> +{
> +	GEM_BUG_ON(!engine->timeline->inflight_seqnos);
> +	engine->timeline->inflight_seqnos--;
> +}
> +
>  void i915_gem_retire_noop(struct i915_gem_active *active,
>  			  struct drm_i915_gem_request *request)
>  {
> @@ -237,6 +243,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>  				 &request->i915->gt.idle_work,
>  				 msecs_to_jiffies(100));
>  	}
> +	unreserve_seqno(request->engine);
>
>  	/* Walk through the active list, calling retire on each. This allows
>  	 * objects to track their GPU activity and mark themselves as idle
> @@ -307,7 +314,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
>  	} while (tmp != req);
>  }
>
> -static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
> +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
>  {
>  	struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
>  	struct intel_engine_cs *engine;
> @@ -325,15 +332,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
>  	GEM_BUG_ON(i915->gt.active_requests > 1);
>
>  	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
> -	if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
> -		while (intel_breadcrumbs_busy(i915))
> -			cond_resched(); /* spin until threads are complete */
> -	}
> -	atomic_set(&timeline->seqno, seqno);
> +	for_each_engine(engine, i915, id) {
> +		struct intel_timeline *tl = &timeline->engine[id];
>
> -	/* Finally reset hw state */
> -	for_each_engine(engine, i915, id)
> +		if (!i915_seqno_passed(seqno, tl->seqno)) {
> +			/* spin until threads are complete */
> +			while (intel_breadcrumbs_busy(engine))
> +				cond_resched();
> +		}
> +
> +		/* Finally reset hw state */
> +		tl->seqno = seqno;
>  		intel_engine_init_global_seqno(engine, seqno);
> +	}
>
>  	list_for_each_entry(timeline, &i915->gt.timelines, link) {
>  		for_each_engine(engine, i915, id) {
> @@ -358,37 +369,38 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
>  	/* HWS page needs to be set less than what we
>  	 * will inject to ring
>  	 */
> -	return i915_gem_init_global_seqno(dev_priv, seqno - 1);
> +	return reset_all_global_seqno(dev_priv, seqno - 1);
>  }
>
> -static int reserve_global_seqno(struct drm_i915_private *i915)
> +static int reserve_seqno(struct intel_engine_cs *engine)
>  {
> -	u32 active_requests = ++i915->gt.active_requests;
> -	u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
> +	u32 active = ++engine->timeline->inflight_seqnos;
> +	u32 seqno = engine->timeline->seqno;
>  	int ret;
>
>  	/* Reservation is fine until we need to wrap around */
> -	if (likely(seqno + active_requests > seqno))
> +	if (likely(!add_overflows(seqno, active)))
>  		return 0;
>
> -	ret = i915_gem_init_global_seqno(i915, 0);
> +	/* Even though we are tracking inflight seqno individually on each
> +	 * engine, other engines may be observing us using hw semaphores and
> +	 * so we need to idle all engines before wrapping around this engine.
> +	 * As all engines are then idle, we can reset the seqno on all, so
> +	 * we don't stall in quick succession if each engine is being
> +	 * similarly utilized.
> +	 */
> +	ret = reset_all_global_seqno(engine->i915, 0);
>  	if (ret) {
> -		i915->gt.active_requests--;
> +		engine->timeline->inflight_seqnos--;
>  		return ret;
>  	}
>
>  	return 0;
>  }
>
> -static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
> -{
> -	/* seqno only incremented under a mutex */
> -	return ++tl->seqno.counter;
> -}
> -
> -static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
> +static u32 timeline_get_seqno(struct intel_timeline *tl)
>  {
> -	return atomic_inc_return(&tl->seqno);
> +	return ++tl->seqno;
>  }
>
>  void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> @@ -402,14 +414,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
>  	GEM_BUG_ON(timeline == request->timeline);
>  	assert_spin_locked(&timeline->lock);
>
> -	seqno = timeline_get_seqno(timeline->common);
> +	seqno = timeline_get_seqno(timeline);
>  	GEM_BUG_ON(!seqno);
>  	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
>
> -	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
> -	request->previous_seqno = timeline->last_submitted_seqno;
> -	timeline->last_submitted_seqno = seqno;
> -
>  	/* We may be recursing from the signal callback of another i915 fence */
>  	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
>  	request->global_seqno = seqno;
> @@ -516,7 +524,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	if (ret)
>  		return ERR_PTR(ret);
>
> -	ret = reserve_global_seqno(dev_priv);
> +	ret = reserve_seqno(engine);
>  	if (ret)
>  		goto err_unpin;
>
> @@ -568,7 +576,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  		       &i915_fence_ops,
>  		       &req->lock,
>  		       req->timeline->fence_context,
> -		       __timeline_get_seqno(req->timeline->common));
> +		       timeline_get_seqno(req->timeline));
>
>  	/* We bump the ref for the fence chain */
>  	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
> @@ -613,6 +621,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	 */
>  	req->head = req->ring->tail;
>
> +	/* Check that we didn't interrupt ourselves with a new request */
> +	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
>  	return req;
>
>  err_ctx:
> @@ -623,7 +633,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>
>  	kmem_cache_free(dev_priv->requests, req);
>  err_unreserve:
> -	dev_priv->gt.active_requests--;
> +	unreserve_seqno(engine);
>  err_unpin:
>  	engine->context_unpin(engine, ctx);
>  	return ERR_PTR(ret);
> @@ -837,8 +847,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  	 * our i915_gem_request_alloc() and called __i915_add_request() before
>  	 * us, the timeline will hold its seqno which is later than ours.
>  	 */
> -	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
> -				     request->fence.seqno));
> +	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
>
>  	/*
>  	 * To ensure that this call will not fail, space for its emissions
> @@ -892,16 +901,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  	list_add_tail(&request->link, &timeline->requests);
>  	spin_unlock_irq(&timeline->lock);
>
> -	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
> -				     request->fence.seqno));
> -
> -	timeline->last_submitted_seqno = request->fence.seqno;
> +	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
>  	i915_gem_active_set(&timeline->last_request, request);
>
>  	list_add_tail(&request->ring_link, &ring->request_list);
>  	request->emitted_jiffies = jiffies;
>
> -	i915_gem_mark_busy(engine);
> +	if (!request->i915->gt.active_requests++)
> +		i915_gem_mark_busy(engine);
>
>  	/* Let the backend know a new request has arrived that may need
>  	 * to adjust the existing execution schedule due to a high priority
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index ea511f06efaf..9049936c571c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -145,12 +145,6 @@ struct drm_i915_gem_request {
>
>  	u32 global_seqno;
>
> -	/** GEM sequence number associated with the previous request,
> -	 * when the HWS breadcrumb is equal to this the GPU is processing
> -	 * this request.
> -	 */
> -	u32 previous_seqno;
> -
>  	/** Position in the ring of the start of the request */
>  	u32 head;
>
> @@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req)
>  {
>  	GEM_BUG_ON(!req->global_seqno);
>  	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> -				 req->previous_seqno);
> +				 req->global_seqno - 1);
>  }
>
>  static inline bool
> diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
> index f2e51f42cc2f..6c53e14cab2a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
> @@ -33,7 +33,13 @@ struct i915_gem_timeline;
>
>  struct intel_timeline {
>  	u64 fence_context;
> -	u32 last_submitted_seqno;
> +	u32 seqno;
> +
> +	/**
> +	 * Count of outstanding requests, from the time they are constructed
> +	 * to the moment they are retired. Loosely coupled to hardware.
> +	 */
> +	u32 inflight_seqnos;
>
>  	spinlock_t lock;
>
> @@ -56,7 +62,6 @@ struct intel_timeline {
>
>  struct i915_gem_timeline {
>  	struct list_head link;
> -	atomic_t seqno;
>
>  	struct drm_i915_private *i915;
>  	const char *name;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 5932e2dc0c6f..4f4e703d1b14 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -676,31 +676,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
>  	cancel_fake_irq(engine);
>  }
>
> -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915)
> +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
>  {
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	unsigned int mask = 0;
> -
> -	for_each_engine(engine, i915, id) {
> -		struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
> -		spin_lock_irq(&b->lock);
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	bool busy = false;
>
> -		if (b->first_wait) {
> -			wake_up_process(b->first_wait->tsk);
> -			mask |= intel_engine_flag(engine);
> -		}
> +	spin_lock_irq(&b->lock);
>
> -		if (b->first_signal) {
> -			wake_up_process(b->signaler);
> -			mask |= intel_engine_flag(engine);
> -		}
> +	if (b->first_wait) {
> +		wake_up_process(b->first_wait->tsk);
> +		busy |= intel_engine_flag(engine);
> +	}
>
> -		spin_unlock_irq(&b->lock);
> +	if (b->first_signal) {
> +		wake_up_process(b->signaler);
> +		busy |= intel_engine_flag(engine);
>  	}
>
> -	return mask;
> +	spin_unlock_irq(&b->lock);
> +
> +	return busy;
>  }
>
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 46c94740be53..c4d4698f98e7 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -252,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
>  		engine->irq_seqno_barrier(engine);
>
>  	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
> -	engine->timeline->last_submitted_seqno = seqno;
> -
>  	engine->hangcheck.seqno = seqno;
>
>  	/* After manually advancing the seqno, fake the interrupt in case
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 4091c97be6ec..b91c2c7ef5a6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -556,7 +556,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
>  	 * wtih serialising this hint with anything, so document it as
>  	 * a hint and nothing more.
>  	 */
> -	return READ_ONCE(engine->timeline->last_submitted_seqno);
> +	return READ_ONCE(engine->timeline->seqno);
>  }
>
>  int init_workarounds_ring(struct intel_engine_cs *engine);
> @@ -630,7 +630,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
>
>  void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
>  void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
> -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
> +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
>
>  static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
>  {
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 07/15] drm/i915: Protect the request->global_seqno with the engine->timeline lock
  2017-02-22 11:46 ` [PATCH v2 07/15] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
@ 2017-02-22 12:29   ` Tvrtko Ursulin
  2017-02-22 12:45     ` Chris Wilson
  0 siblings, 1 reply; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 12:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 11:46, Chris Wilson wrote:
> A request is assigned a global seqno only when it is on the hardware
> execution queue. The global seqno can be used to maintain a list of
> requests on the same engine in retirement order, for example for
> constructing a priority queue for waiting. Prior to its execution, or
> if it is subsequently removed in the event of preemption, its global
> seqno is zero. As both insertion and removal from the execution queue
> may operate in IRQ context, it is not guarded by the usual struct_mutex
> BKL. Instead those relying on the global seqno must be prepared for its
> value to change between reads. Only when the request is complete can
> the global seqno be stable (due to the memory barriers on submitting
> the commands to the hardware to write the breadcrumb, if the HWS shows
> that it has passed the global seqno and the global seqno is unchanged
> after the read, it is indeed complete).

Missed some questions I've raised on this one in the previous round.

I never got round re-reading it if you were waiting for that by any chance.

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          | 16 ++++++--
>  drivers/gpu/drm/i915/i915_gem.c          | 16 +++++---
>  drivers/gpu/drm/i915/i915_gem_request.c  | 46 ++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_request.h  | 66 +++++++++++++++++++++++++-------
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 ++++--
>  5 files changed, 114 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 994929660027..3e4feeaeef60 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -4022,14 +4022,24 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
>  }
>
>  static inline bool
> -__i915_request_irq_complete(struct drm_i915_gem_request *req)
> +__i915_request_irq_complete(const struct drm_i915_gem_request *req)
>  {
>  	struct intel_engine_cs *engine = req->engine;
> +	u32 seqno = i915_gem_request_global_seqno(req);
> +
> +	/* The request was dequeued before we were awoken. We check after
> +	 * inspecting the hw to confirm that this was the same request
> +	 * that generated the HWS update. The memory barriers within
> +	 * the request execution are sufficient to ensure that a check
> +	 * after reading the value from hw matches this request.
> +	 */
> +	if (!seqno)
> +		return false;
>
>  	/* Before we do the heavier coherent read of the seqno,
>  	 * check the value (hopefully) in the CPU cacheline.
>  	 */
> -	if (__i915_gem_request_completed(req))
> +	if (__i915_gem_request_completed(req, seqno))
>  		return true;
>
>  	/* Ensure our read of the seqno is coherent so that we
> @@ -4080,7 +4090,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
>  			wake_up_process(tsk);
>  		rcu_read_unlock();
>
> -		if (__i915_gem_request_completed(req))
> +		if (__i915_gem_request_completed(req, seqno))
>  			return true;
>  	}
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fce2cec8e665..f950cedb6516 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -397,7 +397,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
>  	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
>  		i915_gem_request_retire_upto(rq);
>
> -	if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
> +	if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
>  		/* The GPU is now idle and this client has stalled.
>  		 * Since no other client has submitted a request in the
>  		 * meantime, assume that this client is the only one
> @@ -2609,7 +2609,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
>  struct drm_i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_gem_request *request;
> +	struct drm_i915_gem_request *request, *active = NULL;
> +	unsigned long flags;
>
>  	/* We are called by the error capture and reset at a random
>  	 * point in time. In particular, note that neither is crucially
> @@ -2619,17 +2620,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
>  	 * extra delay for a recent interrupt is pointless. Hence, we do
>  	 * not need an engine->irq_seqno_barrier() before the seqno reads.
>  	 */
> +	spin_lock_irqsave(&engine->timeline->lock, flags);
>  	list_for_each_entry(request, &engine->timeline->requests, link) {
> -		if (__i915_gem_request_completed(request))
> +		if (__i915_gem_request_completed(request,
> +						 request->global_seqno))
>  			continue;
>
>  		GEM_BUG_ON(request->engine != engine);
>  		GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>  				    &request->fence.flags));
> -		return request;
> +
> +		active = request;
> +		break;
>  	}
> +	spin_unlock_irqrestore(&engine->timeline->lock, flags);
>
> -	return NULL;
> +	return active;
>  }
>
>  static bool engine_stalled(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 477e8fc125ce..d18f450977e0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -418,7 +418,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
>  		intel_engine_enable_signaling(request);
>  	spin_unlock(&request->lock);
>
> -	GEM_BUG_ON(!request->global_seqno);
>  	engine->emit_breadcrumb(request,
>  				request->ring->vaddr + request->postfix);
>
> @@ -505,7 +504,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	/* Move the oldest request to the slab-cache (if not in use!) */
>  	req = list_first_entry_or_null(&engine->timeline->requests,
>  				       typeof(*req), link);
> -	if (req && __i915_gem_request_completed(req))
> +	if (req && i915_gem_request_completed(req))
>  		i915_gem_request_retire(req);
>
>  	/* Beware: Dragons be flying overhead.
> @@ -611,6 +610,7 @@ static int
>  i915_gem_request_await_request(struct drm_i915_gem_request *to,
>  			       struct drm_i915_gem_request *from)
>  {
> +	u32 seqno;
>  	int ret;
>
>  	GEM_BUG_ON(to == from);
> @@ -633,14 +633,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
>  		return ret < 0 ? ret : 0;
>  	}
>
> -	if (!from->global_seqno) {
> +	seqno = i915_gem_request_global_seqno(from);
> +	if (!seqno) {
>  		ret = i915_sw_fence_await_dma_fence(&to->submit,
>  						    &from->fence, 0,
>  						    GFP_KERNEL);
>  		return ret < 0 ? ret : 0;
>  	}
>
> -	if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
> +	if (seqno <= to->timeline->sync_seqno[from->engine->id])
>  		return 0;
>
>  	trace_i915_gem_ring_sync_to(to, from);
> @@ -658,7 +659,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
>  			return ret;
>  	}
>
> -	to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
> +	to->timeline->sync_seqno[from->engine->id] = seqno;
>  	return 0;
>  }
>
> @@ -939,7 +940,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
>  }
>
>  bool __i915_spin_request(const struct drm_i915_gem_request *req,
> -			 int state, unsigned long timeout_us)
> +			 u32 seqno, int state, unsigned long timeout_us)
>  {
>  	struct intel_engine_cs *engine = req->engine;
>  	unsigned int irq, cpu;
> @@ -957,7 +958,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
>  	irq = atomic_read(&engine->irq_count);
>  	timeout_us += local_clock_us(&cpu);
>  	do {
> -		if (__i915_gem_request_completed(req))
> +		if (seqno != i915_gem_request_global_seqno(req))
> +			break;
> +
> +		if (i915_seqno_passed(intel_engine_get_seqno(req->engine),
> +				      seqno))
>  			return true;
>
>  		/* Seqno are meant to be ordered *before* the interrupt. If
> @@ -1029,11 +1034,15 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  	if (flags & I915_WAIT_LOCKED)
>  		add_wait_queue(errq, &reset);
>
> +	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
> +
>  	reset_wait_queue(&req->execute, &exec);
> -	if (!req->global_seqno) {
> +	if (!wait.seqno) {
>  		do {
>  			set_current_state(state);
> -			if (req->global_seqno)
> +
> +			wait.seqno = i915_gem_request_global_seqno(req);
> +			if (wait.seqno)
>  				break;
>
>  			if (flags & I915_WAIT_LOCKED &&
> @@ -1061,7 +1070,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		if (timeout < 0)
>  			goto complete;
>
> -		GEM_BUG_ON(!req->global_seqno);
> +		GEM_BUG_ON(!wait.seqno);
>  	}
>  	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
>
> @@ -1070,7 +1079,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		goto complete;
>
>  	set_current_state(state);
> -	intel_wait_init(&wait, req->global_seqno);
>  	if (intel_engine_add_wait(req->engine, &wait))
>  		/* In order to check that we haven't missed the interrupt
>  		 * as we enabled it, we need to kick ourselves to do a
> @@ -1091,7 +1099,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>
>  		timeout = io_schedule_timeout(timeout);
>
> -		if (intel_wait_complete(&wait))
> +		if (intel_wait_complete(&wait) &&
> +		    i915_gem_request_global_seqno(req) == wait.seqno)
>  			break;
>
>  		set_current_state(state);
> @@ -1142,14 +1151,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  static void engine_retire_requests(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_gem_request *request, *next;
> +	u32 seqno = intel_engine_get_seqno(engine);
> +	LIST_HEAD(retire);
>
> +	spin_lock_irq(&engine->timeline->lock);
>  	list_for_each_entry_safe(request, next,
>  				 &engine->timeline->requests, link) {
> -		if (!__i915_gem_request_completed(request))
> -			return;
> +		if (!i915_seqno_passed(seqno, request->global_seqno))
> +			break;
>
> -		i915_gem_request_retire(request);
> +		list_move_tail(&request->link, &retire);
>  	}
> +	spin_unlock_irq(&engine->timeline->lock);
> +
> +	list_for_each_entry_safe(request, next, &retire, link)
> +		i915_gem_request_retire(request);
>  }
>
>  void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 467d3e13fce0..b81f6709905c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -135,6 +135,11 @@ struct drm_i915_gem_request {
>  	struct i915_priotree priotree;
>  	struct i915_dependency dep;
>
> +	/** GEM sequence number associated with this request on the
> +	 * global execution timeline. It is zero when the request is not
> +	 * on the HW queue (i.e. not on the engine timeline list).
> +	 * Its value is guarded by the timeline spinlock.
> +	 */
>  	u32 global_seqno;
>
>  	/** Position in the ring of the start of the request */
> @@ -229,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
>  	*pdst = src;
>  }
>
> +/**
> + * i915_gem_request_global_seqno - report the current global seqno
> + * @request - the request
> + *
> + * A request is assigned a global seqno only when it is on the hardware
> + * execution queue. The global seqno can be used to maintain a list of
> + * requests on the same engine in retirement order, for example for
> + * constructing a priority queue for waiting. Prior to its execution, or
> + * if it is subsequently removed in the event of preemption, its global
> + * seqno is zero. As both insertion and removal from the execution queue
> + * may operate in IRQ context, it is not guarded by the usual struct_mutex
> + * BKL. Instead those relying on the global seqno must be prepared for its
> + * value to change between reads. Only when the request is complete can
> + * the global seqno be stable (due to the memory barriers on submitting
> + * the commands to the hardware to write the breadcrumb, if the HWS shows
> + * that it has passed the global seqno and the global seqno is unchanged
> + * after the read, it is indeed complete).
> + */
> +static u32
> +i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
> +{
> +	return READ_ONCE(request->global_seqno);
> +}
> +
>  int
>  i915_gem_request_await_object(struct drm_i915_gem_request *to,
>  			      struct drm_i915_gem_object *obj,
> @@ -269,46 +298,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
>  }
>
>  static inline bool
> -__i915_gem_request_started(const struct drm_i915_gem_request *req)
> +__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
>  {
> -	GEM_BUG_ON(!req->global_seqno);
> +	GEM_BUG_ON(!seqno);
>  	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> -				 req->global_seqno - 1);
> +				 seqno - 1);
>  }
>
>  static inline bool
>  i915_gem_request_started(const struct drm_i915_gem_request *req)
>  {
> -	if (!req->global_seqno)
> +	u32 seqno;
> +
> +	seqno = i915_gem_request_global_seqno(req);
> +	if (!seqno)
>  		return false;
>
> -	return __i915_gem_request_started(req);
> +	return __i915_gem_request_started(req, seqno);
>  }
>
>  static inline bool
> -__i915_gem_request_completed(const struct drm_i915_gem_request *req)
> +__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
>  {
> -	GEM_BUG_ON(!req->global_seqno);
> -	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> -				 req->global_seqno);
> +	GEM_BUG_ON(!seqno);
> +	return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
> +		seqno == i915_gem_request_global_seqno(req);
>  }
>
>  static inline bool
>  i915_gem_request_completed(const struct drm_i915_gem_request *req)
>  {
> -	if (!req->global_seqno)
> +	u32 seqno;
> +
> +	seqno = i915_gem_request_global_seqno(req);
> +	if (!seqno)
>  		return false;
>
> -	return __i915_gem_request_completed(req);
> +	return __i915_gem_request_completed(req, seqno);
>  }
>
>  bool __i915_spin_request(const struct drm_i915_gem_request *request,
> -			 int state, unsigned long timeout_us);
> +			 u32 seqno, int state, unsigned long timeout_us);
>  static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
>  				     int state, unsigned long timeout_us)
>  {
> -	return (__i915_gem_request_started(request) &&
> -		__i915_spin_request(request, state, timeout_us));
> +	u32 seqno;
> +
> +	seqno = i915_gem_request_global_seqno(request);
> +	return (__i915_gem_request_started(request, seqno) &&
> +		__i915_spin_request(request, seqno, state, timeout_us));
>  }
>
>  /* We treat requests as fences. This is not be to confused with our
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 4f4e703d1b14..d5bf4c0b2deb 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -545,6 +545,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>  	struct intel_breadcrumbs *b = &engine->breadcrumbs;
>  	struct rb_node *parent, **p;
>  	bool first, wakeup;
> +	u32 seqno;
>
>  	/* Note that we may be called from an interrupt handler on another
>  	 * device (e.g. nouveau signaling a fence completion causing us
> @@ -555,11 +556,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>
>  	/* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
>  	assert_spin_locked(&request->lock);
> -	if (!request->global_seqno)
> +
> +	seqno = i915_gem_request_global_seqno(request);
> +	if (!seqno)
>  		return;
>
>  	request->signaling.wait.tsk = b->signaler;
> -	request->signaling.wait.seqno = request->global_seqno;
> +	request->signaling.wait.seqno = seqno;
>  	i915_gem_request_get(request);
>
>  	spin_lock(&b->lock);
> @@ -583,8 +586,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>  	p = &b->signals.rb_node;
>  	while (*p) {
>  		parent = *p;
> -		if (i915_seqno_passed(request->global_seqno,
> -				      to_signaler(parent)->global_seqno)) {
> +		if (i915_seqno_passed(seqno,
> +				      to_signaler(parent)->signaling.wait.seqno)) {
>  			p = &parent->rb_right;
>  			first = false;
>  		} else {
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 08/15] drm/i915: Take a reference whilst processing the signaler request
  2017-02-22 11:46 ` [PATCH v2 08/15] drm/i915: Take a reference whilst processing the signaler request Chris Wilson
@ 2017-02-22 12:35   ` Tvrtko Ursulin
  0 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 12:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 11:46, Chris Wilson wrote:
> The plan in the near-future is to allow requests to be removed from the
> signaler. We can no longer then rely on holding a reference to the
> request for the duration it is in the signaling tree, and instead must
> obtain a reference to the request for the current operation using RCU.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 24 ++++++++++++++++--------
>  drivers/gpu/drm/i915/intel_ringbuffer.h  |  2 +-
>  2 files changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index d5bf4c0b2deb..62e6b8181200 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -496,7 +496,11 @@ static int intel_breadcrumbs_signaler(void *arg)
>  		 * need to wait for a new interrupt from the GPU or for
>  		 * a new client.
>  		 */
> -		request = READ_ONCE(b->first_signal);
> +		rcu_read_lock();
> +		request = rcu_dereference(b->first_signal);
> +		if (request)
> +			request = i915_gem_request_get_rcu(request);
> +		rcu_read_unlock();
>  		if (signal_complete(request)) {
>  			local_bh_disable();
>  			dma_fence_signal(&request->fence);
> @@ -515,24 +519,28 @@ static int intel_breadcrumbs_signaler(void *arg)
>  			 * the oldest before picking the next one.
>  			 */
>  			spin_lock_irq(&b->lock);
> -			if (request == b->first_signal) {
> +			if (request == rcu_access_pointer(b->first_signal)) {
>  				struct rb_node *rb =
>  					rb_next(&request->signaling.node);
> -				b->first_signal = rb ? to_signaler(rb) : NULL;
> +				rcu_assign_pointer(b->first_signal,
> +						   rb ? to_signaler(rb) : NULL);
>  			}
>  			rb_erase(&request->signaling.node, &b->signals);
>  			spin_unlock_irq(&b->lock);
>
>  			i915_gem_request_put(request);
>  		} else {
> -			if (kthread_should_stop())
> +			if (kthread_should_stop()) {
> +				GEM_BUG_ON(request);
>  				break;
> +			}
>
>  			schedule();
>
>  			if (kthread_should_park())
>  				kthread_parkme();
>  		}
> +		i915_gem_request_put(request);
>  	} while (1);
>  	__set_current_state(TASK_RUNNING);
>
> @@ -597,7 +605,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>  	rb_link_node(&request->signaling.node, parent, p);
>  	rb_insert_color(&request->signaling.node, &b->signals);
>  	if (first)
> -		smp_store_mb(b->first_signal, request);
> +		rcu_assign_pointer(b->first_signal, request);
>
>  	spin_unlock(&b->lock);
>
> @@ -670,7 +678,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
>  	/* The engines should be idle and all requests accounted for! */
>  	WARN_ON(READ_ONCE(b->first_wait));
>  	WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
> -	WARN_ON(READ_ONCE(b->first_signal));
> +	WARN_ON(rcu_access_pointer(b->first_signal));
>  	WARN_ON(!RB_EMPTY_ROOT(&b->signals));
>
>  	if (!IS_ERR_OR_NULL(b->signaler))
> @@ -686,8 +694,8 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
>
>  	spin_lock_irq(&b->lock);
>
> -	if (b->first_wait) {
> -		wake_up_process(b->first_wait->tsk);
> +	if (rcu_access_pointer(b->first_signal)) {
> +		wake_up_process(b->signaler);
>  		busy |= intel_engine_flag(engine);
>  	}
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index b91c2c7ef5a6..3fcb9dd19b07 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -242,7 +242,7 @@ struct intel_engine_cs {
>  		struct rb_root signals; /* sorted by retirement */
>  		struct intel_wait *first_wait; /* oldest waiter by retirement */
>  		struct task_struct *signaler; /* used for fence signalling */
> -		struct drm_i915_gem_request *first_signal;
> +		struct drm_i915_gem_request __rcu *first_signal;
>  		struct timer_list fake_irq; /* used after a missed interrupt */
>  		struct timer_list hangcheck; /* detect missed interrupts */
>
>

Looks OK.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 07/15] drm/i915: Protect the request->global_seqno with the engine->timeline lock
  2017-02-22 12:29   ` Tvrtko Ursulin
@ 2017-02-22 12:45     ` Chris Wilson
  2017-02-22 13:13       ` Tvrtko Ursulin
  0 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 12:45 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Wed, Feb 22, 2017 at 12:29:00PM +0000, Tvrtko Ursulin wrote:
> 
> On 22/02/2017 11:46, Chris Wilson wrote:
> >A request is assigned a global seqno only when it is on the hardware
> >execution queue. The global seqno can be used to maintain a list of
> >requests on the same engine in retirement order, for example for
> >constructing a priority queue for waiting. Prior to its execution, or
> >if it is subsequently removed in the event of preemption, its global
> >seqno is zero. As both insertion and removal from the execution queue
> >may operate in IRQ context, it is not guarded by the usual struct_mutex
> >BKL. Instead those relying on the global seqno must be prepared for its
> >value to change between reads. Only when the request is complete can
> >the global seqno be stable (due to the memory barriers on submitting
> >the commands to the hardware to write the breadcrumb, if the HWS shows
> >that it has passed the global seqno and the global seqno is unchanged
> >after the read, it is indeed complete).
> 
> Missed some questions I've raised on this one in the previous round.

Just missed it when scanning for opens from the threads.
 
> I never got round re-reading it if you were waiting for that by any chance.

Looks like it is one you have to read after seeing where the series
goes.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 09/15] drm/i915: Allow an request to be cancelled
  2017-02-22 11:46 ` [PATCH v2 09/15] drm/i915: Allow an request to be cancelled Chris Wilson
@ 2017-02-22 13:08   ` Tvrtko Ursulin
  0 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 13:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 11:46, Chris Wilson wrote:
> If we preempt a request and remove it from the execution queue, we need
> to undo its global seqno and restart any waiters.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 71 +++++++++++++++++++++++++-------
>  drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 +
>  2 files changed, 57 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 62e6b8181200..882e601ebb09 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -356,22 +356,15 @@ static inline int wakeup_priority(struct intel_breadcrumbs *b,
>  		return tsk->prio;
>  }
>
> -void intel_engine_remove_wait(struct intel_engine_cs *engine,
> -			      struct intel_wait *wait)
> +static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
> +				       struct intel_wait *wait)
>  {
>  	struct intel_breadcrumbs *b = &engine->breadcrumbs;
>
> -	/* Quick check to see if this waiter was already decoupled from
> -	 * the tree by the bottom-half to avoid contention on the spinlock
> -	 * by the herd.
> -	 */
> -	if (RB_EMPTY_NODE(&wait->node))
> -		return;
> -
> -	spin_lock_irq(&b->lock);
> +	assert_spin_locked(&b->lock);
>
>  	if (RB_EMPTY_NODE(&wait->node))
> -		goto out_unlock;
> +		goto out;
>
>  	if (b->first_wait == wait) {
>  		const int priority = wakeup_priority(b, wait->tsk);
> @@ -436,11 +429,27 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
>  	GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
>  	rb_erase(&wait->node, &b->waiters);
>
> -out_unlock:
> +out:
>  	GEM_BUG_ON(b->first_wait == wait);
>  	GEM_BUG_ON(rb_first(&b->waiters) !=
>  		   (b->first_wait ? &b->first_wait->node : NULL));
>  	GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
> +}
> +
> +void intel_engine_remove_wait(struct intel_engine_cs *engine,
> +			      struct intel_wait *wait)
> +{
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> +	/* Quick check to see if this waiter was already decoupled from
> +	 * the tree by the bottom-half to avoid contention on the spinlock
> +	 * by the herd.
> +	 */
> +	if (RB_EMPTY_NODE(&wait->node))
> +		return;
> +
> +	spin_lock_irq(&b->lock);
> +	__intel_engine_remove_wait(engine, wait);
>  	spin_unlock_irq(&b->lock);
>  }
>
> @@ -506,11 +515,13 @@ static int intel_breadcrumbs_signaler(void *arg)
>  			dma_fence_signal(&request->fence);
>  			local_bh_enable(); /* kick start the tasklets */
>
> +			spin_lock_irq(&b->lock);
> +
>  			/* Wake up all other completed waiters and select the
>  			 * next bottom-half for the next user interrupt.
>  			 */
> -			intel_engine_remove_wait(engine,
> -						 &request->signaling.wait);
> +			__intel_engine_remove_wait(engine,
> +						   &request->signaling.wait);
>
>  			/* Find the next oldest signal. Note that as we have
>  			 * not been holding the lock, another client may
> @@ -518,7 +529,6 @@ static int intel_breadcrumbs_signaler(void *arg)
>  			 * we just completed - so double check we are still
>  			 * the oldest before picking the next one.
>  			 */
> -			spin_lock_irq(&b->lock);
>  			if (request == rcu_access_pointer(b->first_signal)) {
>  				struct rb_node *rb =
>  					rb_next(&request->signaling.node);
> @@ -526,6 +536,8 @@ static int intel_breadcrumbs_signaler(void *arg)
>  						   rb ? to_signaler(rb) : NULL);
>  			}
>  			rb_erase(&request->signaling.node, &b->signals);
> +			RB_CLEAR_NODE(&request->signaling.node);
> +
>  			spin_unlock_irq(&b->lock);
>
>  			i915_gem_request_put(request);
> @@ -613,6 +625,35 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
>  		wake_up_process(b->signaler);
>  }
>
> +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
> +{
> +	struct intel_engine_cs *engine = request->engine;
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> +	assert_spin_locked(&request->lock);
> +	GEM_BUG_ON(!request->signaling.wait.seqno);
> +
> +	spin_lock(&b->lock);
> +
> +	if (!RB_EMPTY_NODE(&request->signaling.node)) {
> +		if (request == rcu_access_pointer(b->first_signal)) {
> +			struct rb_node *rb =
> +				rb_next(&request->signaling.node);
> +			rcu_assign_pointer(b->first_signal,
> +					   rb ? to_signaler(rb) : NULL);
> +		}
> +		rb_erase(&request->signaling.node, &b->signals);
> +		RB_CLEAR_NODE(&request->signaling.node);
> +	}
> +
> +	__intel_engine_remove_wait(engine, &request->signaling.wait);
> +
> +	spin_unlock(&b->lock);
> +
> +	request->signaling.wait.seqno = 0;
> +	i915_gem_request_put(request);
> +}
> +
>  int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
>  {
>  	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 3fcb9dd19b07..45d2c2fa946e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -598,6 +598,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
>  void intel_engine_remove_wait(struct intel_engine_cs *engine,
>  			      struct intel_wait *wait);
>  void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
> +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
>
>  static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
>  {
>

I peeked ahead a bit to see how it will be used and it looks fine.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 07/15] drm/i915: Protect the request->global_seqno with the engine->timeline lock
  2017-02-22 12:45     ` Chris Wilson
@ 2017-02-22 13:13       ` Tvrtko Ursulin
  0 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 13:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 12:45, Chris Wilson wrote:
> On Wed, Feb 22, 2017 at 12:29:00PM +0000, Tvrtko Ursulin wrote:
>>
>> On 22/02/2017 11:46, Chris Wilson wrote:
>>> A request is assigned a global seqno only when it is on the hardware
>>> execution queue. The global seqno can be used to maintain a list of
>>> requests on the same engine in retirement order, for example for
>>> constructing a priority queue for waiting. Prior to its execution, or
>>> if it is subsequently removed in the event of preemption, its global
>>> seqno is zero. As both insertion and removal from the execution queue
>>> may operate in IRQ context, it is not guarded by the usual struct_mutex
>>> BKL. Instead those relying on the global seqno must be prepared for its
>>> value to change between reads. Only when the request is complete can
>>> the global seqno be stable (due to the memory barriers on submitting
>>> the commands to the hardware to write the breadcrumb, if the HWS shows
>>> that it has passed the global seqno and the global seqno is unchanged
>>> after the read, it is indeed complete).
>>
>> Missed some questions I've raised on this one in the previous round.
>
> Just missed it when scanning for opens from the threads.
>
>> I never got round re-reading it if you were waiting for that by any chance.
>
> Looks like it is one you have to read after seeing where the series
> goes.

Yes I did not peek ahead when reading it the first time. Looks fine.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (14 preceding siblings ...)
  2017-02-22 11:46 ` [PATCH v2 15/15] drm/i915: Remove one level of indention from wait-for-execute Chris Wilson
@ 2017-02-22 13:22 ` Patchwork
  2017-02-22 20:52 ` ✗ Fi.CI.BAT: warning for series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine (rev2) Patchwork
  16 siblings, 0 replies; 39+ messages in thread
From: Patchwork @ 2017-02-22 13:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine
URL   : https://patchwork.freedesktop.org/series/20056/
State : failure

== Summary ==

Series 20056v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/20056/revisions/1/mbox/

Test gem_exec_store:
        Subgroup basic-bsd2:
                skip       -> INCOMPLETE (fi-byt-j1900)

fi-bdw-5557u     total:253  pass:242  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:253  pass:214  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:253  pass:234  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:78   pass:67   dwarn:0   dfail:0   fail:0   skip:10 
fi-byt-n2820     total:253  pass:222  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:253  pass:237  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:253  pass:237  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:253  pass:203  dwarn:0   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:253  pass:235  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:253  pass:243  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:253  pass:236  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:253  pass:231  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:253  pass:243  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:253  pass:225  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:253  pass:224  dwarn:0   dfail:0   fail:0   skip:29 

2362c155a028031f2686044863203d4825ace1e7 drm-tip: 2017y-02m-22d-10h-33m-14s UTC integration manifest
9858fa2 drm/i915: Remove one level of indention from wait-for-execute
9cd4e6f drm/i915: Immediately process a reset before starting waiting
824d609 drm/i915: Refactor direct GPU reset from request waiters
c9793ad drm/i915: Replace reset_wait_queue with default_wake_function
2c10ac8 drm/i915: Exercise request cancellation using a mock selftest
e3222f0 drm/i915: Remove the preempted request from the execution queue
e0e9788 drm/i915: Allow an request to be cancelled
fb33716 drm/i915: Take a reference whilst processing the signaler request
7298999 drm/i915: Protect the request->global_seqno with the engine->timeline lock
dd5632c drm/i915: Deconstruct execute fence
4dd11c5 drm/i915: Inline __i915_gem_request_wait_for_execute()
73c675e drm/i915: Add ourselves to the gpu error waitqueue for the entire wait
3ec0452 drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue
16b1b01 drm/i915: Move reeerve_seqno() next to unreserve_seqno()
0015ef2 drm/i915: Keep a global seqno per-engine

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3927/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 10/15] drm/i915: Remove the preempted request from the execution queue
  2017-02-22 11:46 ` [PATCH v2 10/15] drm/i915: Remove the preempted request from the execution queue Chris Wilson
@ 2017-02-22 13:33   ` Tvrtko Ursulin
  2017-02-22 13:40     ` Chris Wilson
  2017-02-22 18:53     ` [PATCH v3] " Chris Wilson
  0 siblings, 2 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 13:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 11:46, Chris Wilson wrote:
> After the request is cancelled, we then need to remove it from the
> global execution timeline and return it to the context timeline, the
> inverse of submit_request().
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c            | 58 +++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_gem_request.h            |  3 ++
>  drivers/gpu/drm/i915/intel_breadcrumbs.c           | 19 ++++++-
>  drivers/gpu/drm/i915/intel_ringbuffer.h            |  6 ---
>  drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c |  6 +++
>  5 files changed, 83 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index d18f450977e0..97116e492d01 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -441,6 +441,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request)
>  	spin_unlock_irqrestore(&engine->timeline->lock, flags);
>  }
>
> +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
> +{
> +	struct intel_engine_cs *engine = request->engine;
> +	struct intel_timeline *timeline;
> +
> +	assert_spin_locked(&engine->timeline->lock);
> +
> +	/* Only unwind in reverse order, required so that the per-context list
> +	 * is kept in seqno/ring order.
> +	 */
> +	GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
> +	engine->timeline->seqno--;
> +
> +	/* We may be recursing from the signal callback of another i915 fence */

Copy-paste of the comment of there will really be preemption triggered 
from the signal callback?

> +	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> +	request->global_seqno = 0;
> +	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> +		intel_engine_cancel_signaling(request);
> +	spin_unlock(&request->lock);
> +
> +	/* Transfer back from the global per-engine timeline to per-context */
> +	timeline = request->timeline;
> +	GEM_BUG_ON(timeline == engine->timeline);
> +
> +	spin_lock(&timeline->lock);
> +	list_move(&request->link, &timeline->requests);
> +	spin_unlock(&timeline->lock);
> +
> +	/* We don't need to wake_up any waiters on request->execute, they
> +	 * will get woken by any other event or us re-adding this request
> +	 * to the engine timeline (__i915_gem_request_submit()). The waiters
> +	 * should be quite adapt at finding that the request now has a new
> +	 * global_seqno to the one they went to sleep on.
> +	 */
> +}
> +
> +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
> +{
> +	struct intel_engine_cs *engine = request->engine;
> +	unsigned long flags;
> +
> +	/* Will be called from irq-context when using foreign fences. */
> +	spin_lock_irqsave(&engine->timeline->lock, flags);
> +
> +	__i915_gem_request_unsubmit(request);
> +
> +	spin_unlock_irqrestore(&engine->timeline->lock, flags);
> +}
> +
>  static int __i915_sw_fence_call
>  submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
>  {
> @@ -1034,9 +1083,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  	if (flags & I915_WAIT_LOCKED)
>  		add_wait_queue(errq, &reset);
>
> -	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
> +	wait.tsk = current;
>
> +restart:
>  	reset_wait_queue(&req->execute, &exec);
> +	wait.seqno = i915_gem_request_global_seqno(req);

Not sure if it is worth dropping intel_wait_init, I presume to avoid 
assigning the task twice? It will still be the same task so just moving 
the intel_wait_init here would be clearer.

>  	if (!wait.seqno) {
>  		do {
>  			set_current_state(state);
> @@ -1135,6 +1186,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		/* Only spin if we know the GPU is processing this request */
>  		if (i915_spin_request(req, state, 2))
>  			break;
> +
> +		if (i915_gem_request_global_seqno(req) != wait.seqno) {
> +			intel_engine_remove_wait(req->engine, &wait);
> +			goto restart;
> +		}
>  	}
>
>  	intel_engine_remove_wait(req->engine, &wait);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index b81f6709905c..5f73d8c0a38a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
>  void __i915_gem_request_submit(struct drm_i915_gem_request *request);
>  void i915_gem_request_submit(struct drm_i915_gem_request *request);
>
> +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
> +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
> +
>  struct intel_rps_client;
>  #define NO_WAITBOOST ERR_PTR(-1)
>  #define IS_RPS_CLIENT(p) (!IS_ERR(p))
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 882e601ebb09..5bcad7872c08 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -453,7 +453,14 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
>  	spin_unlock_irq(&b->lock);
>  }
>
> -static bool signal_complete(struct drm_i915_gem_request *request)
> +static bool signal_valid(const struct drm_i915_gem_request *request)
> +{
> +	u32 seqno = READ_ONCE(request->global_seqno);
> +
> +	return seqno == request->signaling.wait.seqno;
> +}
> +
> +static bool signal_complete(const struct drm_i915_gem_request *request)
>  {
>  	if (!request)
>  		return false;
> @@ -462,7 +469,7 @@ static bool signal_complete(struct drm_i915_gem_request *request)
>  	 * signalled that this wait is already completed.
>  	 */
>  	if (intel_wait_complete(&request->signaling.wait))
> -		return true;
> +		return signal_valid(request);
>
>  	/* Carefully check if the request is complete, giving time for the
>  	 * seqno to be visible or if the GPU hung.
> @@ -542,13 +549,21 @@ static int intel_breadcrumbs_signaler(void *arg)
>
>  			i915_gem_request_put(request);
>  		} else {
> +			DEFINE_WAIT(exec);
> +
>  			if (kthread_should_stop()) {
>  				GEM_BUG_ON(request);
>  				break;
>  			}
>
> +			if (request)
> +				add_wait_queue(&request->execute, &exec);
> +
>  			schedule();
>
> +			if (request)
> +				remove_wait_queue(&request->execute, &exec);
> +

Not directly related but made me think why we are using 
TASK_INTERRUPTIBLE in the signallers? Shouldn't it be 
TASK_UNINTERRUPTIBLE and io_schedule? Sounds a bit deja vu though, maybe 
we have talked about it before..

>  			if (kthread_should_park())
>  				kthread_parkme();
>  		}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 45d2c2fa946e..97fde79167a6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -582,12 +582,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
>  /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
>  int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
>
> -static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
> -{
> -	wait->tsk = current;
> -	wait->seqno = seqno;
> -}
> -
>  static inline bool intel_wait_complete(const struct intel_wait *wait)
>  {
>  	return RB_EMPTY_NODE(&wait->node);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
> index 6426acc9fdca..62c020c7ea80 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
> @@ -28,6 +28,12 @@
>  #include "mock_gem_device.h"
>  #include "mock_engine.h"
>
> +static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
> +{
> +	wait->tsk = current;
> +	wait->seqno = seqno;
> +}
> +
>  static int check_rbtree(struct intel_engine_cs *engine,
>  			const unsigned long *bitmap,
>  			const struct intel_wait *waiters,
>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 10/15] drm/i915: Remove the preempted request from the execution queue
  2017-02-22 13:33   ` Tvrtko Ursulin
@ 2017-02-22 13:40     ` Chris Wilson
  2017-02-22 13:50       ` Tvrtko Ursulin
  2017-02-22 18:53     ` [PATCH v3] " Chris Wilson
  1 sibling, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 13:40 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Wed, Feb 22, 2017 at 01:33:22PM +0000, Tvrtko Ursulin wrote:
> 
> On 22/02/2017 11:46, Chris Wilson wrote:
> >+void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
> >+{
> >+	struct intel_engine_cs *engine = request->engine;
> >+	struct intel_timeline *timeline;
> >+
> >+	assert_spin_locked(&engine->timeline->lock);
> >+
> >+	/* Only unwind in reverse order, required so that the per-context list
> >+	 * is kept in seqno/ring order.
> >+	 */
> >+	GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
> >+	engine->timeline->seqno--;
> >+
> >+	/* We may be recursing from the signal callback of another i915 fence */
> 
> Copy-paste of the comment of there will really be preemption
> triggered from the signal callback?

I believe it may be. Say an RCS request was waiting on a BCS request,
and we decide to preempt, and can do so immediately. I think being
prepared for the same recursion here is predundant.

> > static int __i915_sw_fence_call
> > submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> > {
> >@@ -1034,9 +1083,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> > 	if (flags & I915_WAIT_LOCKED)
> > 		add_wait_queue(errq, &reset);
> >
> >-	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
> >+	wait.tsk = current;
> >
> >+restart:
> > 	reset_wait_queue(&req->execute, &exec);
> >+	wait.seqno = i915_gem_request_global_seqno(req);
> 
> Not sure if it is worth dropping intel_wait_init, I presume to avoid
> assigning the task twice? It will still be the same task so just
> moving the intel_wait_init here would be clearer.

I was thinking the opposite, since we are looking at wait.seqno directly
elsewhere, so wanted that to be clear. And current is in a special
register, so why pay the cost to reload it onto stack :)

> >@@ -542,13 +549,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> >
> > 			i915_gem_request_put(request);
> > 		} else {
> >+			DEFINE_WAIT(exec);
> >+
> > 			if (kthread_should_stop()) {
> > 				GEM_BUG_ON(request);
> > 				break;
> > 			}
> >
> >+			if (request)
> >+				add_wait_queue(&request->execute, &exec);
> >+
> > 			schedule();
> >
> >+			if (request)
> >+				remove_wait_queue(&request->execute, &exec);
> >+
> 
> Not directly related but made me think why we are using
> TASK_INTERRUPTIBLE in the signallers? Shouldn't it be
> TASK_UNINTERRUPTIBLE and io_schedule? Sounds a bit deja vu though,
> maybe we have talked about it before..

It doesn't make any difference to the signalers are they are kthreads
and shouldn't be interrupted - but it does make a difference to the
reported load as TASK_UNINTERRUPTIBLE contribute to system load.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 11/15] drm/i915: Exercise request cancellation using a mock selftest
  2017-02-22 11:46 ` [PATCH v2 11/15] drm/i915: Exercise request cancellation using a mock selftest Chris Wilson
@ 2017-02-22 13:46   ` Tvrtko Ursulin
  2017-02-22 13:59     ` Chris Wilson
  0 siblings, 1 reply; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 13:46 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 11:46, Chris Wilson wrote:
> Add a mock selftest to preempt a request and check that we cancel it,
> requeue the request and then complete its execution.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/selftests/i915_gem_request.c | 59 +++++++++++++++++++++++
>  drivers/gpu/drm/i915/selftests/mock_request.c     | 19 ++++++++
>  drivers/gpu/drm/i915/selftests/mock_request.h     |  2 +
>  3 files changed, 80 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
> index 9d056a86723d..c803ef60a127 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
> @@ -26,6 +26,7 @@
>
>  #include "../i915_selftest.h"
>
> +#include "mock_context.h"
>  #include "mock_gem_device.h"
>
>  static int igt_add_request(void *arg)
> @@ -181,12 +182,70 @@ static int igt_fence_wait(void *arg)
>  	return err;
>  }
>
> +static int igt_request_rewind(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct drm_i915_gem_request *request, *vip;
> +	struct i915_gem_context *ctx[2];
> +	int err = -EINVAL;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	ctx[0] = mock_context(i915, "A");
> +	request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ);
> +	if (!request) {
> +		err = -ENOMEM;
> +		goto err_device;

Leaks ctx[0].

> +	}
> +	i915_add_request(request);
> +
> +	ctx[1] = mock_context(i915, "B");
> +	vip = mock_request(i915->engine[RCS], ctx[1], 0);
> +	if (!vip) {
> +		err = -ENOMEM;
> +		goto err_locked;

Leaks the request.

> +	}
> +
> +	/* Simulate preemption by manual reordering */
> +	if (!mock_cancel_request(request)) {
> +		pr_err("failed to cancel request (already executed)!\n");
> +		i915_add_request(vip);
> +		goto err_locked;
> +	}
> +	i915_add_request(vip);
> +	request->engine->submit_request(request);
> +
> +	mutex_unlock(&i915->drm.struct_mutex);
> +
> +	if (i915_wait_request(vip, 0, HZ) == -ETIME) {
> +		pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n",
> +		       vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS]));
> +		goto err;
> +	}
> +
> +	if (i915_gem_request_completed(request)) {
> +		pr_err("low priority request already completed\n");
> +		goto err;
> +	}
> +
> +	err = 0;
> +err:
> +	mutex_lock(&i915->drm.struct_mutex);
> +err_locked:
> +	mock_context_close(ctx[1]);
> +	mock_context_close(ctx[0]);
> +err_device:
> +	mock_device_flush(i915);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>  int i915_gem_request_mock_selftests(void)
>  {
>  	static const struct i915_subtest tests[] = {
>  		SUBTEST(igt_add_request),
>  		SUBTEST(igt_wait_request),
>  		SUBTEST(igt_fence_wait),
> +		SUBTEST(igt_request_rewind),
>  	};
>  	struct drm_i915_private *i915;
>  	int err;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
> index e23242d1b88a..0e8d2e7f8c70 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_request.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_request.c
> @@ -22,6 +22,7 @@
>   *
>   */
>
> +#include "mock_engine.h"
>  #include "mock_request.h"
>
>  struct drm_i915_gem_request *
> @@ -42,3 +43,21 @@ mock_request(struct intel_engine_cs *engine,
>
>  	return &mock->base;
>  }
> +
> +bool mock_cancel_request(struct drm_i915_gem_request *request)
> +{
> +	struct mock_request *mock = container_of(request, typeof(*mock), base);
> +	struct mock_engine *engine =
> +		container_of(request->engine, typeof(*engine), base);
> +	bool was_queued;
> +
> +	spin_lock_irq(&engine->hw_lock);
> +	was_queued = !list_empty(&mock->link);

I suggest a better name for the mock request. Even just req would be 
better IMO.

> +	list_del_init(&mock->link);
> +	spin_unlock_irq(&engine->hw_lock);
> +
> +	if (was_queued)
> +		i915_gem_request_unsubmit(request);
> +
> +	return was_queued;
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h
> index cc76d4f4eb4e..4dea74c8e96d 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_request.h
> +++ b/drivers/gpu/drm/i915/selftests/mock_request.h
> @@ -41,4 +41,6 @@ mock_request(struct intel_engine_cs *engine,
>  	     struct i915_gem_context *context,
>  	     unsigned long delay);
>
> +bool mock_cancel_request(struct drm_i915_gem_request *request);
> +
>  #endif /* !__MOCK_REQUEST__ */
>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 10/15] drm/i915: Remove the preempted request from the execution queue
  2017-02-22 13:40     ` Chris Wilson
@ 2017-02-22 13:50       ` Tvrtko Ursulin
  0 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 13:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 13:40, Chris Wilson wrote:
> On Wed, Feb 22, 2017 at 01:33:22PM +0000, Tvrtko Ursulin wrote:
>>
>> On 22/02/2017 11:46, Chris Wilson wrote:
>>> +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
>>> +{
>>> +	struct intel_engine_cs *engine = request->engine;
>>> +	struct intel_timeline *timeline;
>>> +
>>> +	assert_spin_locked(&engine->timeline->lock);
>>> +
>>> +	/* Only unwind in reverse order, required so that the per-context list
>>> +	 * is kept in seqno/ring order.
>>> +	 */
>>> +	GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
>>> +	engine->timeline->seqno--;
>>> +
>>> +	/* We may be recursing from the signal callback of another i915 fence */
>>
>> Copy-paste of the comment of there will really be preemption
>> triggered from the signal callback?
>
> I believe it may be. Say an RCS request was waiting on a BCS request,
> and we decide to preempt, and can do so immediately. I think being
> prepared for the same recursion here is predundant.

Yeah OK, just wasn't sure at which level will we handle preemption.

>>> static int __i915_sw_fence_call
>>> submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
>>> {
>>> @@ -1034,9 +1083,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>>> 	if (flags & I915_WAIT_LOCKED)
>>> 		add_wait_queue(errq, &reset);
>>>
>>> -	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
>>> +	wait.tsk = current;
>>>
>>> +restart:
>>> 	reset_wait_queue(&req->execute, &exec);
>>> +	wait.seqno = i915_gem_request_global_seqno(req);
>>
>> Not sure if it is worth dropping intel_wait_init, I presume to avoid
>> assigning the task twice? It will still be the same task so just
>> moving the intel_wait_init here would be clearer.
>
> I was thinking the opposite, since we are looking at wait.seqno directly
> elsewhere, so wanted that to be clear. And current is in a special
> register, so why pay the cost to reload it onto stack :)

I can see that but intel_wait_init was so nice as a marker when reading 
the code.

Maybe leave it and add intel_wait_update_seqno?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 11/15] drm/i915: Exercise request cancellation using a mock selftest
  2017-02-22 13:46   ` Tvrtko Ursulin
@ 2017-02-22 13:59     ` Chris Wilson
  2017-02-22 14:03       ` Chris Wilson
  2017-02-22 14:05       ` Tvrtko Ursulin
  0 siblings, 2 replies; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 13:59 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Wed, Feb 22, 2017 at 01:46:10PM +0000, Tvrtko Ursulin wrote:
> 
> On 22/02/2017 11:46, Chris Wilson wrote:
> >Add a mock selftest to preempt a request and check that we cancel it,
> >requeue the request and then complete its execution.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> > drivers/gpu/drm/i915/selftests/i915_gem_request.c | 59 +++++++++++++++++++++++
> > drivers/gpu/drm/i915/selftests/mock_request.c     | 19 ++++++++
> > drivers/gpu/drm/i915/selftests/mock_request.h     |  2 +
> > 3 files changed, 80 insertions(+)
> >
> >diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
> >index 9d056a86723d..c803ef60a127 100644
> >--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
> >+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
> >@@ -26,6 +26,7 @@
> >
> > #include "../i915_selftest.h"
> >
> >+#include "mock_context.h"
> > #include "mock_gem_device.h"
> >
> > static int igt_add_request(void *arg)
> >@@ -181,12 +182,70 @@ static int igt_fence_wait(void *arg)
> > 	return err;
> > }
> >
> >+static int igt_request_rewind(void *arg)
> >+{
> >+	struct drm_i915_private *i915 = arg;
> >+	struct drm_i915_gem_request *request, *vip;
> >+	struct i915_gem_context *ctx[2];
> >+	int err = -EINVAL;
> >+
> >+	mutex_lock(&i915->drm.struct_mutex);
> >+	ctx[0] = mock_context(i915, "A");
> >+	request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ);
> >+	if (!request) {
> >+		err = -ENOMEM;
> >+		goto err_device;
> 
> Leaks ctx[0].
> 
> >+	}
> >+	i915_add_request(request);
> >+
> >+	ctx[1] = mock_context(i915, "B");
> >+	vip = mock_request(i915->engine[RCS], ctx[1], 0);
> >+	if (!vip) {
> >+		err = -ENOMEM;
> >+		goto err_locked;
> 
> Leaks the request.

That's been handed over to the device.

> >+bool mock_cancel_request(struct drm_i915_gem_request *request)
> >+{
> >+	struct mock_request *mock = container_of(request, typeof(*mock), base);
> >+	struct mock_engine *engine =
> >+		container_of(request->engine, typeof(*engine), base);
> >+	bool was_queued;
> >+
> >+	spin_lock_irq(&engine->hw_lock);
> >+	was_queued = !list_empty(&mock->link);
> 
> I suggest a better name for the mock request. Even just req would be
> better IMO.

A bit late, the pattern has been set for the file. Currently
request for drm_i915_gem_request and mock for mock_request. I definitely
don't like the idea of req and request in the same function.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 11/15] drm/i915: Exercise request cancellation using a mock selftest
  2017-02-22 13:59     ` Chris Wilson
@ 2017-02-22 14:03       ` Chris Wilson
  2017-02-22 14:05       ` Tvrtko Ursulin
  1 sibling, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 14:03 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On Wed, Feb 22, 2017 at 01:59:13PM +0000, Chris Wilson wrote:
> On Wed, Feb 22, 2017 at 01:46:10PM +0000, Tvrtko Ursulin wrote:
> > 
> > On 22/02/2017 11:46, Chris Wilson wrote:
> > >+	ctx[1] = mock_context(i915, "B");
> > >+	vip = mock_request(i915->engine[RCS], ctx[1], 0);
> > >+	if (!vip) {
> > >+		err = -ENOMEM;
> > >+		goto err_locked;
> > 
> > Leaks the request.
> 
> That's been handed over to the device.

However, we shouldn't assume that the test harness is functioning
perfectly and the request remains valid as we poke it without a
reference of our own.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 11/15] drm/i915: Exercise request cancellation using a mock selftest
  2017-02-22 13:59     ` Chris Wilson
  2017-02-22 14:03       ` Chris Wilson
@ 2017-02-22 14:05       ` Tvrtko Ursulin
  1 sibling, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 14:05 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 13:59, Chris Wilson wrote:
> On Wed, Feb 22, 2017 at 01:46:10PM +0000, Tvrtko Ursulin wrote:
>>
>> On 22/02/2017 11:46, Chris Wilson wrote:
>>> Add a mock selftest to preempt a request and check that we cancel it,
>>> requeue the request and then complete its execution.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>> drivers/gpu/drm/i915/selftests/i915_gem_request.c | 59 +++++++++++++++++++++++
>>> drivers/gpu/drm/i915/selftests/mock_request.c     | 19 ++++++++
>>> drivers/gpu/drm/i915/selftests/mock_request.h     |  2 +
>>> 3 files changed, 80 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
>>> index 9d056a86723d..c803ef60a127 100644
>>> --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
>>> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
>>> @@ -26,6 +26,7 @@
>>>
>>> #include "../i915_selftest.h"
>>>
>>> +#include "mock_context.h"
>>> #include "mock_gem_device.h"
>>>
>>> static int igt_add_request(void *arg)
>>> @@ -181,12 +182,70 @@ static int igt_fence_wait(void *arg)
>>> 	return err;
>>> }
>>>
>>> +static int igt_request_rewind(void *arg)
>>> +{
>>> +	struct drm_i915_private *i915 = arg;
>>> +	struct drm_i915_gem_request *request, *vip;
>>> +	struct i915_gem_context *ctx[2];
>>> +	int err = -EINVAL;
>>> +
>>> +	mutex_lock(&i915->drm.struct_mutex);
>>> +	ctx[0] = mock_context(i915, "A");
>>> +	request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ);
>>> +	if (!request) {
>>> +		err = -ENOMEM;
>>> +		goto err_device;
>>
>> Leaks ctx[0].
>>
>>> +	}
>>> +	i915_add_request(request);
>>> +
>>> +	ctx[1] = mock_context(i915, "B");
>>> +	vip = mock_request(i915->engine[RCS], ctx[1], 0);
>>> +	if (!vip) {
>>> +		err = -ENOMEM;
>>> +		goto err_locked;
>>
>> Leaks the request.
>
> That's been handed over to the device.
>
>>> +bool mock_cancel_request(struct drm_i915_gem_request *request)
>>> +{
>>> +	struct mock_request *mock = container_of(request, typeof(*mock), base);
>>> +	struct mock_engine *engine =
>>> +		container_of(request->engine, typeof(*engine), base);
>>> +	bool was_queued;
>>> +
>>> +	spin_lock_irq(&engine->hw_lock);
>>> +	was_queued = !list_empty(&mock->link);
>>
>> I suggest a better name for the mock request. Even just req would be
>> better IMO.
>
> A bit late, the pattern has been set for the file. Currently
> request for drm_i915_gem_request and mock for mock_request. I definitely
> don't like the idea of req and request in the same function.

Pattern? Try grep "mock_request *" on selftests/* ! :)

Okay.. I r-b'ed some of those so I'll shut up.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 12/15] drm/i915: Replace reset_wait_queue with default_wake_function
  2017-02-22 11:46 ` [PATCH v2 12/15] drm/i915: Replace reset_wait_queue with default_wake_function Chris Wilson
@ 2017-02-22 14:13   ` Tvrtko Ursulin
  0 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 14:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 11:46, Chris Wilson wrote:
> If we change the wait_queue_t from using the autoremove_wake_function to
> the default_wake_function, we no longer have to restore the wait_queue_t
> entry on the wait_queue_head_t list after being woken up by it, as we
> are unusual in sleeping multiple times on the same wait_queue_t.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 30 +++++++-----------------------
>  1 file changed, 7 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 97116e492d01..42250eaf56ec 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -946,16 +946,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
>  }
>
> -static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
> -{
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&q->lock, flags);
> -	if (list_empty(&wait->task_list))
> -		__add_wait_queue(q, wait);
> -	spin_unlock_irqrestore(&q->lock, flags);
> -}
> -
>  static unsigned long local_clock_us(unsigned int *cpu)
>  {
>  	unsigned long t;
> @@ -1060,8 +1050,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
>  		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
>  	wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue;
> -	DEFINE_WAIT(reset);
> -	DEFINE_WAIT(exec);
> +	DEFINE_WAIT_FUNC(reset, default_wake_function);
> +	DEFINE_WAIT_FUNC(exec, default_wake_function);
>  	struct intel_wait wait;
>
>  	might_sleep();
> @@ -1080,13 +1070,13 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>
>  	trace_i915_gem_request_wait_begin(req, flags);
>
> +	add_wait_queue(&req->execute, &exec);
>  	if (flags & I915_WAIT_LOCKED)
>  		add_wait_queue(errq, &reset);
>
>  	wait.tsk = current;
>
>  restart:
> -	reset_wait_queue(&req->execute, &exec);
>  	wait.seqno = i915_gem_request_global_seqno(req);
>  	if (!wait.seqno) {
>  		do {
> @@ -1100,26 +1090,21 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  			    i915_reset_in_progress(&req->i915->gpu_error)) {
>  				__set_current_state(TASK_RUNNING);
>  				i915_reset(req->i915);
> -				reset_wait_queue(errq, &reset);
>  				continue;
>  			}
>
>  			if (signal_pending_state(state, current)) {
>  				timeout = -ERESTARTSYS;
> -				break;
> +				goto complete;
>  			}
>
>  			if (!timeout) {
>  				timeout = -ETIME;
> -				break;
> +				goto complete;
>  			}
>
>  			timeout = io_schedule_timeout(timeout);
>  		} while (1);
> -		finish_wait(&req->execute, &exec);
> -
> -		if (timeout < 0)
> -			goto complete;
>
>  		GEM_BUG_ON(!wait.seqno);
>  	}
> @@ -1179,7 +1164,6 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		    i915_reset_in_progress(&req->i915->gpu_error)) {
>  			__set_current_state(TASK_RUNNING);
>  			i915_reset(req->i915);
> -			reset_wait_queue(errq, &reset);
>  			continue;
>  		}
>
> @@ -1194,11 +1178,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  	}
>
>  	intel_engine_remove_wait(req->engine, &wait);
> -	__set_current_state(TASK_RUNNING);
> -
>  complete:
> +	__set_current_state(TASK_RUNNING);
>  	if (flags & I915_WAIT_LOCKED)
>  		remove_wait_queue(errq, &reset);
> +	remove_wait_queue(&req->execute, &exec);
>  	trace_i915_gem_request_wait_end(req);
>
>  	return timeout;
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 13/15] drm/i915: Refactor direct GPU reset from request waiters
  2017-02-22 11:46 ` [PATCH v2 13/15] drm/i915: Refactor direct GPU reset from request waiters Chris Wilson
@ 2017-02-22 14:16   ` Tvrtko Ursulin
  2017-02-22 14:26     ` Chris Wilson
  0 siblings, 1 reply; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 14:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 11:46, Chris Wilson wrote:
> Combine the common code for the pair of waiters into a single function.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 21 ++++++++++++---------
>  1 file changed, 12 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 42250eaf56ec..66d292384e60 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -1024,6 +1024,16 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
>  	return false;
>  }
>
> +static bool __i915_reset_request(struct drm_i915_gem_request *request)
> +{
> +	if (likely(!i915_reset_in_progress(&request->i915->gpu_error)))
> +		return false;
> +
> +	__set_current_state(TASK_RUNNING);
> +	i915_reset(request->i915);
> +	return true;
> +}
> +
>  /**
>   * i915_wait_request - wait until execution of request has finished
>   * @req: the request to wait upon
> @@ -1087,11 +1097,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  				break;
>
>  			if (flags & I915_WAIT_LOCKED &&
> -			    i915_reset_in_progress(&req->i915->gpu_error)) {
> -				__set_current_state(TASK_RUNNING);
> -				i915_reset(req->i915);
> +			    __i915_reset_request(req))
>  				continue;
> -			}
>
>  			if (signal_pending_state(state, current)) {
>  				timeout = -ERESTARTSYS;
> @@ -1160,12 +1167,8 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  		 * to come along and update the breadcrumb (either directly
>  		 * itself, or indirectly by recovering the GPU).
>  		 */
> -		if (flags & I915_WAIT_LOCKED &&
> -		    i915_reset_in_progress(&req->i915->gpu_error)) {
> -			__set_current_state(TASK_RUNNING);
> -			i915_reset(req->i915);
> +		if (flags & I915_WAIT_LOCKED && __i915_reset_request(req))
>  			continue;
> -		}
>
>  		/* Only spin if we know the GPU is processing this request */
>  		if (i915_spin_request(req, state, 2))
>

Okay in principle just unclear from the code now what it does. I suggest 
__i915_check_and_reset_request or __i915_request_check_and_reset so it 
is obvious the statement not only checks but also does stuff.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 15/15] drm/i915: Remove one level of indention from wait-for-execute
  2017-02-22 11:46 ` [PATCH v2 15/15] drm/i915: Remove one level of indention from wait-for-execute Chris Wilson
@ 2017-02-22 14:22   ` Tvrtko Ursulin
  0 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 14:22 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 11:46, Chris Wilson wrote:
> Now that the code is getting simpler, we can reduce the indentation when
> waiting for the global_seqno.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 41 +++++++++++++++------------------
>  1 file changed, 18 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 00ec3dbca184..d7e72aed1bd3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -1089,34 +1089,29 @@ long i915_wait_request(struct drm_i915_gem_request *req,
>  	wait.tsk = current;
>
>  restart:
> -	wait.seqno = i915_gem_request_global_seqno(req);
> -	if (!wait.seqno) {
> -		do {
> -			set_current_state(state);
> -
> -			wait.seqno = i915_gem_request_global_seqno(req);
> -			if (wait.seqno)
> -				break;
> +	do {
> +		set_current_state(state);
> +		wait.seqno = i915_gem_request_global_seqno(req);
> +		if (wait.seqno)
> +			break;
>
> -			if (flags & I915_WAIT_LOCKED &&
> -			    __i915_reset_request(req))
> -				continue;
> +		if (flags & I915_WAIT_LOCKED && __i915_reset_request(req))
> +			continue;
>
> -			if (signal_pending_state(state, current)) {
> -				timeout = -ERESTARTSYS;
> -				goto complete;
> -			}
> +		if (signal_pending_state(state, current)) {
> +			timeout = -ERESTARTSYS;
> +			goto complete;
> +		}
>
> -			if (!timeout) {
> -				timeout = -ETIME;
> -				goto complete;
> -			}
> +		if (!timeout) {
> +			timeout = -ETIME;
> +			goto complete;
> +		}
>
> -			timeout = io_schedule_timeout(timeout);
> -		} while (1);
> +		timeout = io_schedule_timeout(timeout);
> +	} while (1);
>
> -		GEM_BUG_ON(!wait.seqno);
> -	}
> +	GEM_BUG_ON(!wait.seqno);
>  	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
>
>  	/* Optimistic short spin before touching IRQs */
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 13/15] drm/i915: Refactor direct GPU reset from request waiters
  2017-02-22 14:16   ` Tvrtko Ursulin
@ 2017-02-22 14:26     ` Chris Wilson
  2017-02-22 15:07       ` Tvrtko Ursulin
  0 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 14:26 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Wed, Feb 22, 2017 at 02:16:49PM +0000, Tvrtko Ursulin wrote:
> 
> On 22/02/2017 11:46, Chris Wilson wrote:
> >Combine the common code for the pair of waiters into a single function.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> > drivers/gpu/drm/i915/i915_gem_request.c | 21 ++++++++++++---------
> > 1 file changed, 12 insertions(+), 9 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> >index 42250eaf56ec..66d292384e60 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_request.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_request.c
> >@@ -1024,6 +1024,16 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
> > 	return false;
> > }
> >
> >+static bool __i915_reset_request(struct drm_i915_gem_request *request)
> >+{
> >+	if (likely(!i915_reset_in_progress(&request->i915->gpu_error)))
> >+		return false;
> >+
> >+	__set_current_state(TASK_RUNNING);
> >+	i915_reset(request->i915);
> >+	return true;
> >+}

> Okay in principle just unclear from the code now what it does. I
> suggest __i915_check_and_reset_request or
> __i915_request_check_and_reset so it is obvious the statement not
> only checks but also does stuff.

__i915_wait_request_check_and_reset ?

Looking at it keeping the i915_wait_request in there to show it's a
dedicated helper will also be sensible.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH v2 13/15] drm/i915: Refactor direct GPU reset from request waiters
  2017-02-22 14:26     ` Chris Wilson
@ 2017-02-22 15:07       ` Tvrtko Ursulin
  0 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22 15:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/02/2017 14:26, Chris Wilson wrote:
> On Wed, Feb 22, 2017 at 02:16:49PM +0000, Tvrtko Ursulin wrote:
>>
>> On 22/02/2017 11:46, Chris Wilson wrote:
>>> Combine the common code for the pair of waiters into a single function.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>> drivers/gpu/drm/i915/i915_gem_request.c | 21 ++++++++++++---------
>>> 1 file changed, 12 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
>>> index 42250eaf56ec..66d292384e60 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
>>> @@ -1024,6 +1024,16 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
>>> 	return false;
>>> }
>>>
>>> +static bool __i915_reset_request(struct drm_i915_gem_request *request)
>>> +{
>>> +	if (likely(!i915_reset_in_progress(&request->i915->gpu_error)))
>>> +		return false;
>>> +
>>> +	__set_current_state(TASK_RUNNING);
>>> +	i915_reset(request->i915);
>>> +	return true;
>>> +}
>
>> Okay in principle just unclear from the code now what it does. I
>> suggest __i915_check_and_reset_request or
>> __i915_request_check_and_reset so it is obvious the statement not
>> only checks but also does stuff.
>
> __i915_wait_request_check_and_reset ?
>
> Looking at it keeping the i915_wait_request in there to show it's a
> dedicated helper will also be sensible.

Sounds good to me.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH v3] drm/i915: Remove the preempted request from the execution queue
  2017-02-22 13:33   ` Tvrtko Ursulin
  2017-02-22 13:40     ` Chris Wilson
@ 2017-02-22 18:53     ` Chris Wilson
  1 sibling, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 18:53 UTC (permalink / raw)
  To: intel-gfx

After the request is cancelled, we then need to remove it from the
global execution timeline and return it to the context timeline, the
inverse of submit_request().

v2: Move manipulation of struct intel_wait to helpers

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c            | 66 ++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_request.h            |  3 +
 drivers/gpu/drm/i915/intel_breadcrumbs.c           | 17 +++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h            | 37 +++++++++++-
 drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c |  6 +-
 5 files changed, 117 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index d18f450977e0..76e31cd7840e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -441,6 +441,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
+void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	struct intel_timeline *timeline;
+
+	assert_spin_locked(&engine->timeline->lock);
+
+	/* Only unwind in reverse order, required so that the per-context list
+	 * is kept in seqno/ring order.
+	 */
+	GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
+	engine->timeline->seqno--;
+
+	/* We may be recursing from the signal callback of another i915 fence */
+	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+	request->global_seqno = 0;
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+		intel_engine_cancel_signaling(request);
+	spin_unlock(&request->lock);
+
+	/* Transfer back from the global per-engine timeline to per-context */
+	timeline = request->timeline;
+	GEM_BUG_ON(timeline == engine->timeline);
+
+	spin_lock(&timeline->lock);
+	list_move(&request->link, &timeline->requests);
+	spin_unlock(&timeline->lock);
+
+	/* We don't need to wake_up any waiters on request->execute, they
+	 * will get woken by any other event or us re-adding this request
+	 * to the engine timeline (__i915_gem_request_submit()). The waiters
+	 * should be quite adapt at finding that the request now has a new
+	 * global_seqno to the one they went to sleep on.
+	 */
+}
+
+void i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
+{
+	struct intel_engine_cs *engine = request->engine;
+	unsigned long flags;
+
+	/* Will be called from irq-context when using foreign fences. */
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+
+	__i915_gem_request_unsubmit(request);
+
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+}
+
 static int __i915_sw_fence_call
 submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 {
@@ -1034,15 +1083,15 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(errq, &reset);
 
-	intel_wait_init(&wait, i915_gem_request_global_seqno(req));
+	intel_wait_init(&wait);
 
+restart:
 	reset_wait_queue(&req->execute, &exec);
-	if (!wait.seqno) {
+	if (!intel_wait_update_request(&wait, req)) {
 		do {
 			set_current_state(state);
 
-			wait.seqno = i915_gem_request_global_seqno(req);
-			if (wait.seqno)
+			if (intel_wait_update_request(&wait, req))
 				break;
 
 			if (flags & I915_WAIT_LOCKED &&
@@ -1070,7 +1119,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		if (timeout < 0)
 			goto complete;
 
-		GEM_BUG_ON(!wait.seqno);
+		GEM_BUG_ON(!intel_wait_has_seqno(&wait));
 	}
 	GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit));
 
@@ -1100,7 +1149,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		timeout = io_schedule_timeout(timeout);
 
 		if (intel_wait_complete(&wait) &&
-		    i915_gem_request_global_seqno(req) == wait.seqno)
+		    intel_wait_check_request(&wait, req))
 			break;
 
 		set_current_state(state);
@@ -1135,6 +1184,11 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
 			break;
+
+		if (!intel_wait_check_request(&wait, req)) {
+			intel_engine_remove_wait(req->engine, &wait);
+			goto restart;
+		}
 	}
 
 	intel_engine_remove_wait(req->engine, &wait);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index b81f6709905c..5f73d8c0a38a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 void __i915_gem_request_submit(struct drm_i915_gem_request *request);
 void i915_gem_request_submit(struct drm_i915_gem_request *request);
 
+void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
+void i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
+
 struct intel_rps_client;
 #define NO_WAITBOOST ERR_PTR(-1)
 #define IS_RPS_CLIENT(p) (!IS_ERR(p))
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 46e7fca4b189..ac8663d8e73f 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -453,7 +453,12 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 	spin_unlock_irq(&b->lock);
 }
 
-static bool signal_complete(struct drm_i915_gem_request *request)
+static bool signal_valid(const struct drm_i915_gem_request *request)
+{
+	return intel_wait_check_request(&request->signaling.wait, request);
+}
+
+static bool signal_complete(const struct drm_i915_gem_request *request)
 {
 	if (!request)
 		return false;
@@ -462,7 +467,7 @@ static bool signal_complete(struct drm_i915_gem_request *request)
 	 * signalled that this wait is already completed.
 	 */
 	if (intel_wait_complete(&request->signaling.wait))
-		return true;
+		return signal_valid(request);
 
 	/* Carefully check if the request is complete, giving time for the
 	 * seqno to be visible or if the GPU hung.
@@ -542,13 +547,21 @@ static int intel_breadcrumbs_signaler(void *arg)
 
 			i915_gem_request_put(request);
 		} else {
+			DEFINE_WAIT(exec);
+
 			if (kthread_should_stop()) {
 				GEM_BUG_ON(request);
 				break;
 			}
 
+			if (request)
+				add_wait_queue(&request->execute, &exec);
+
 			schedule();
 
+			if (request)
+				remove_wait_queue(&request->execute, &exec);
+
 			if (kthread_should_park())
 				kthread_parkme();
 		}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 45d2c2fa946e..fe724d477362 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -582,10 +582,45 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
-static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
+static inline void intel_wait_init(struct intel_wait *wait)
 {
 	wait->tsk = current;
+}
+
+static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
+{
+	wait->tsk = current;
+	wait->seqno = seqno;
+}
+
+static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
+{
+	return wait->seqno;
+}
+
+static inline bool intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
+{
 	wait->seqno = seqno;
+	return intel_wait_has_seqno(wait);
+}
+
+static inline bool intel_wait_update_request(struct intel_wait *wait,
+					     struct drm_i915_gem_request *rq)
+{
+	return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq));
+}
+
+static inline bool intel_wait_check_seqno(const struct intel_wait *wait,
+					  u32 seqno)
+{
+	return wait->seqno == seqno;
+}
+
+static inline bool
+intel_wait_check_request(const struct intel_wait *wait,
+			 const struct drm_i915_gem_request *rq)
+{
+	return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq));
 }
 
 static inline bool intel_wait_complete(const struct intel_wait *wait)
diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
index 6426acc9fdca..621be1ca53d8 100644
--- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
@@ -131,7 +131,7 @@ static int igt_random_insert_remove(void *arg)
 		goto out_bitmap;
 
 	for (n = 0; n < count; n++)
-		intel_wait_init(&waiters[n], seqno_bias + n);
+		intel_wait_init_for_seqno(&waiters[n], seqno_bias + n);
 
 	err = check_rbtree(engine, bitmap, waiters, count);
 	if (err)
@@ -197,7 +197,7 @@ static int igt_insert_complete(void *arg)
 		goto out_waiters;
 
 	for (n = 0; n < count; n++) {
-		intel_wait_init(&waiters[n], n + seqno_bias);
+		intel_wait_init_for_seqno(&waiters[n], n + seqno_bias);
 		intel_engine_add_wait(engine, &waiters[n]);
 		__set_bit(n, bitmap);
 	}
@@ -318,7 +318,7 @@ static int igt_wakeup_thread(void *arg)
 	while (wait_for_ready(w)) {
 		GEM_BUG_ON(kthread_should_stop());
 
-		intel_wait_init(&wait, w->seqno);
+		intel_wait_init_for_seqno(&wait, w->seqno);
 		intel_engine_add_wait(w->engine, &wait);
 		for (;;) {
 			set_current_state(TASK_UNINTERRUPTIBLE);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* ✗ Fi.CI.BAT: warning for series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine (rev2)
  2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
                   ` (15 preceding siblings ...)
  2017-02-22 13:22 ` ✗ Fi.CI.BAT: failure for series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine Patchwork
@ 2017-02-22 20:52 ` Patchwork
  2017-02-22 20:56   ` Chris Wilson
  16 siblings, 1 reply; 39+ messages in thread
From: Patchwork @ 2017-02-22 20:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine (rev2)
URL   : https://patchwork.freedesktop.org/series/20056/
State : warning

== Summary ==

Series 20056v2 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/20056/revisions/2/mbox/

Test gem_exec_parallel:
        Subgroup basic:
                pass       -> DMESG-WARN (fi-skl-6700hq)
                pass       -> DMESG-WARN (fi-skl-6260u)
                pass       -> DMESG-WARN (fi-skl-6700k)
                pass       -> DMESG-WARN (fi-kbl-7500u)
                pass       -> DMESG-WARN (fi-bxt-j4205)
                pass       -> DMESG-WARN (fi-bdw-5557u)
Test gem_ringfill:
        Subgroup basic-default-interruptible:
                pass       -> DMESG-WARN (fi-skl-6770hq)
                pass       -> DMESG-WARN (fi-ivb-3770)
                pass       -> DMESG-WARN (fi-skl-6260u)
                pass       -> DMESG-WARN (fi-hsw-4770r)
                pass       -> DMESG-WARN (fi-skl-6700hq)
                pass       -> DMESG-WARN (fi-bdw-5557u)
                pass       -> DMESG-WARN (fi-snb-2520m)
                pass       -> DMESG-WARN (fi-bxt-j4205)
                pass       -> DMESG-WARN (fi-kbl-7500u)
                pass       -> DMESG-WARN (fi-skl-6700k)
                pass       -> DMESG-WARN (fi-ivb-3520m)
                pass       -> DMESG-WARN (fi-hsw-4770)
                pass       -> DMESG-WARN (fi-snb-2600)
Test gem_sync:
        Subgroup basic-many-each:
                pass       -> DMESG-WARN (fi-snb-2520m)
                pass       -> DMESG-WARN (fi-ivb-3770)
                pass       -> DMESG-WARN (fi-skl-6700k)
        Subgroup basic-store-all:
                pass       -> DMESG-WARN (fi-skl-6700hq)
                pass       -> DMESG-WARN (fi-skl-6260u)
                pass       -> DMESG-WARN (fi-skl-6700k)
                pass       -> DMESG-WARN (fi-kbl-7500u)
                pass       -> DMESG-WARN (fi-bxt-j4205)
                pass       -> DMESG-WARN (fi-skl-6770hq)
                pass       -> DMESG-WARN (fi-bdw-5557u)
Test gem_wait:
        Subgroup basic-await-all:
                pass       -> DMESG-WARN (fi-skl-6700hq)
                pass       -> DMESG-WARN (fi-ivb-3770)
                pass       -> DMESG-WARN (fi-byt-n2820)
                pass       -> DMESG-WARN (fi-hsw-4770r)
                pass       -> DMESG-WARN (fi-skl-6260u)
                pass       -> DMESG-WARN (fi-skl-6770hq)
                pass       -> DMESG-WARN (fi-bsw-n3050)
                pass       -> DMESG-WARN (fi-bdw-5557u)
                pass       -> DMESG-WARN (fi-ilk-650)
                pass       -> DMESG-WARN (fi-snb-2520m)
                pass       -> DMESG-WARN (fi-byt-j1900)
                pass       -> DMESG-WARN (fi-kbl-7500u)
                pass       -> DMESG-WARN (fi-bxt-j4205)
                pass       -> DMESG-WARN (fi-skl-6700k)
                pass       -> DMESG-WARN (fi-ivb-3520m)
                pass       -> DMESG-WARN (fi-hsw-4770)
                pass       -> DMESG-WARN (fi-snb-2600)
        Subgroup basic-wait-all:
                pass       -> DMESG-WARN (fi-skl-6700hq)
                pass       -> DMESG-WARN (fi-ivb-3770)
                pass       -> DMESG-WARN (fi-byt-n2820)
                pass       -> DMESG-WARN (fi-hsw-4770r)
                pass       -> DMESG-WARN (fi-skl-6260u)
                pass       -> DMESG-WARN (fi-skl-6770hq)
                pass       -> DMESG-WARN (fi-bsw-n3050)
                pass       -> DMESG-WARN (fi-bdw-5557u)
                pass       -> DMESG-WARN (fi-ilk-650)
                pass       -> DMESG-WARN (fi-snb-2520m)
                pass       -> DMESG-WARN (fi-byt-j1900)
                pass       -> DMESG-WARN (fi-kbl-7500u)
                pass       -> DMESG-WARN (fi-bxt-j4205)
                pass       -> DMESG-WARN (fi-skl-6700k)
                pass       -> DMESG-WARN (fi-ivb-3520m)
                pass       -> DMESG-WARN (fi-hsw-4770)
                pass       -> DMESG-WARN (fi-snb-2600)

fi-bdw-5557u     total:253  pass:237  dwarn:5   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:253  pass:212  dwarn:2   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:253  pass:229  dwarn:5   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:253  pass:224  dwarn:2   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:253  pass:220  dwarn:2   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:253  pass:234  dwarn:3   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:253  pass:234  dwarn:3   dfail:0   fail:0   skip:16 
fi-ilk-650       total:253  pass:201  dwarn:2   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:253  pass:232  dwarn:3   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:253  pass:231  dwarn:4   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:253  pass:230  dwarn:5   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:253  pass:238  dwarn:5   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:253  pass:231  dwarn:5   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:253  pass:225  dwarn:10  dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:253  pass:239  dwarn:4   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:253  pass:221  dwarn:4   dfail:0   fail:0   skip:28 
fi-snb-2600      total:253  pass:221  dwarn:3   dfail:0   fail:0   skip:29 

bf89ec45d0822835b03910371ac0baf46c4efa2d drm-tip: 2017y-02m-22d-14h-30m-04s UTC integration manifest
66e8567 drm/i915: Exercise request cancellation using a mock selftest
8616e1a drm/i915: Remove the preempted request from the execution queue
84fa2fe drm/i915: Allow an request to be cancelled
f7bc470 drm/i915: Take a reference whilst processing the signaler request
4952d9d drm/i915: Protect the request->global_seqno with the engine->timeline lock
008c707 drm/i915: Deconstruct execute fence
86ca3ec drm/i915: Inline __i915_gem_request_wait_for_execute()
64a457f drm/i915: Add ourselves to the gpu error waitqueue for the entire wait
8050bbb drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue
eaed070 drm/i915: Move reeerve_seqno() next to unreserve_seqno()
6d274fc drm/i915: Keep a global seqno per-engine

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3938/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: ✗ Fi.CI.BAT: warning for series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine (rev2)
  2017-02-22 20:52 ` ✗ Fi.CI.BAT: warning for series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine (rev2) Patchwork
@ 2017-02-22 20:56   ` Chris Wilson
  0 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2017-02-22 20:56 UTC (permalink / raw)
  To: intel-gfx

On Wed, Feb 22, 2017 at 08:52:45PM -0000, Patchwork wrote:
> == Series Details ==
> 
> Series: series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine (rev2)
> URL   : https://patchwork.freedesktop.org/series/20056/
> State : warning
> 
> == Summary ==
> 
> Series 20056v2 Series without cover letter
> https://patchwork.freedesktop.org/api/1.0/series/20056/revisions/2/mbox/
> 
> Test gem_exec_parallel:
>         Subgroup basic:
>                 pass       -> DMESG-WARN (fi-skl-6700hq)
>                 pass       -> DMESG-WARN (fi-skl-6260u)
>                 pass       -> DMESG-WARN (fi-skl-6700k)
>                 pass       -> DMESG-WARN (fi-kbl-7500u)
>                 pass       -> DMESG-WARN (fi-bxt-j4205)
>                 pass       -> DMESG-WARN (fi-bdw-5557u)

Uh oh, the series is not bisect clean.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2017-02-22 20:57 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-22 11:45 Pre-emption pre-enablement Chris Wilson
2017-02-22 11:45 ` [PATCH v2 01/15] drm/i915: Keep a global seqno per-engine Chris Wilson
2017-02-22 12:24   ` Tvrtko Ursulin
2017-02-22 11:45 ` [PATCH v2 02/15] drm/i915: Move reeerve_seqno() next to unreserve_seqno() Chris Wilson
2017-02-22 12:23   ` Joonas Lahtinen
2017-02-22 11:45 ` [PATCH v2 03/15] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Chris Wilson
2017-02-22 11:45 ` [PATCH v2 04/15] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Chris Wilson
2017-02-22 11:46 ` [PATCH v2 05/15] drm/i915: Inline __i915_gem_request_wait_for_execute() Chris Wilson
2017-02-22 11:46 ` [PATCH v2 06/15] drm/i915: Deconstruct execute fence Chris Wilson
2017-02-22 11:46 ` [PATCH v2 07/15] drm/i915: Protect the request->global_seqno with the engine->timeline lock Chris Wilson
2017-02-22 12:29   ` Tvrtko Ursulin
2017-02-22 12:45     ` Chris Wilson
2017-02-22 13:13       ` Tvrtko Ursulin
2017-02-22 11:46 ` [PATCH v2 08/15] drm/i915: Take a reference whilst processing the signaler request Chris Wilson
2017-02-22 12:35   ` Tvrtko Ursulin
2017-02-22 11:46 ` [PATCH v2 09/15] drm/i915: Allow an request to be cancelled Chris Wilson
2017-02-22 13:08   ` Tvrtko Ursulin
2017-02-22 11:46 ` [PATCH v2 10/15] drm/i915: Remove the preempted request from the execution queue Chris Wilson
2017-02-22 13:33   ` Tvrtko Ursulin
2017-02-22 13:40     ` Chris Wilson
2017-02-22 13:50       ` Tvrtko Ursulin
2017-02-22 18:53     ` [PATCH v3] " Chris Wilson
2017-02-22 11:46 ` [PATCH v2 11/15] drm/i915: Exercise request cancellation using a mock selftest Chris Wilson
2017-02-22 13:46   ` Tvrtko Ursulin
2017-02-22 13:59     ` Chris Wilson
2017-02-22 14:03       ` Chris Wilson
2017-02-22 14:05       ` Tvrtko Ursulin
2017-02-22 11:46 ` [PATCH v2 12/15] drm/i915: Replace reset_wait_queue with default_wake_function Chris Wilson
2017-02-22 14:13   ` Tvrtko Ursulin
2017-02-22 11:46 ` [PATCH v2 13/15] drm/i915: Refactor direct GPU reset from request waiters Chris Wilson
2017-02-22 14:16   ` Tvrtko Ursulin
2017-02-22 14:26     ` Chris Wilson
2017-02-22 15:07       ` Tvrtko Ursulin
2017-02-22 11:46 ` [PATCH v2 14/15] drm/i915: Immediately process a reset before starting waiting Chris Wilson
2017-02-22 11:46 ` [PATCH v2 15/15] drm/i915: Remove one level of indention from wait-for-execute Chris Wilson
2017-02-22 14:22   ` Tvrtko Ursulin
2017-02-22 13:22 ` ✗ Fi.CI.BAT: failure for series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine Patchwork
2017-02-22 20:52 ` ✗ Fi.CI.BAT: warning for series starting with [v2,01/15] drm/i915: Keep a global seqno per-engine (rev2) Patchwork
2017-02-22 20:56   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.