All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests
@ 2019-08-12 13:38 Chris Wilson
  2019-08-12 13:38 ` [PATCH 02/18] drm/i915: Push the wakeref->count deferral to the backend Chris Wilson
                   ` (22 more replies)
  0 siblings, 23 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:38 UTC (permalink / raw)
  To: intel-gfx

Since execlista and the guc have diverged in their port tracking, we
cannot simply reuse the execlists cancellation code as it leads to
unbalanced reference counting. Use a local simpler routine for the guc.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  3 ---
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  6 ++---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 23 +++++++++++--------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index e1228b0e577f..4b6a1cf80706 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -136,9 +136,6 @@ execlists_active(const struct intel_engine_execlists *execlists)
 	return READ_ONCE(*execlists->active);
 }
 
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
-
 struct i915_request *
 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b97047d58d3d..5c26c4ae139b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1297,8 +1297,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	}
 }
 
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
 	struct i915_request * const *port, *rq;
 
@@ -2355,7 +2355,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 
 unwind:
 	/* Push back any incomplete requests for replay after the reset. */
-	execlists_cancel_port_requests(execlists);
+	cancel_port_requests(execlists);
 	__unwind_incomplete_requests(engine);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 449ca6357018..a37afc6266ec 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -517,11 +517,7 @@ static struct i915_request *schedule_in(struct i915_request *rq, int idx)
 {
 	trace_i915_request_in(rq, idx);
 
-	if (!rq->hw_context->inflight)
-		rq->hw_context->inflight = rq->engine;
-	intel_context_inflight_inc(rq->hw_context);
 	intel_gt_pm_get(rq->engine->gt);
-
 	return i915_request_get(rq);
 }
 
@@ -529,10 +525,6 @@ static void schedule_out(struct i915_request *rq)
 {
 	trace_i915_request_out(rq);
 
-	intel_context_inflight_dec(rq->hw_context);
-	if (!intel_context_inflight_count(rq->hw_context))
-		rq->hw_context->inflight = NULL;
-
 	intel_gt_pm_put(rq->engine->gt);
 	i915_request_put(rq);
 }
@@ -636,6 +628,17 @@ static void guc_reset_prepare(struct intel_engine_cs *engine)
 	__tasklet_disable_sync_once(&execlists->tasklet);
 }
 
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
+{
+	struct i915_request * const *port, *rq;
+
+	for (port = execlists->active; (rq = *port); port++)
+		schedule_out(rq);
+	execlists->active =
+		memset(execlists->inflight, 0, sizeof(execlists->inflight));
+}
+
 static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -644,7 +647,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	execlists_cancel_port_requests(execlists);
+	cancel_port_requests(execlists);
 
 	/* Push back any incomplete requests for replay after the reset. */
 	rq = execlists_unwind_incomplete_requests(execlists);
@@ -687,7 +690,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
-	execlists_cancel_port_requests(execlists);
+	cancel_port_requests(execlists);
 
 	/* Mark all executing requests as skipped. */
 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 02/18] drm/i915: Push the wakeref->count deferral to the backend
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
@ 2019-08-12 13:38 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 03/18] drm/i915/gt: Save/restore interrupts around breadcrumb disable Chris Wilson
                   ` (21 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:38 UTC (permalink / raw)
  To: intel-gfx

If the backend wishes to defer the wakeref parking, make it responsible
for unlocking the wakeref (i.e. bumping the counter). This allows it to
time the unlock much more carefully in case it happens to needs the
wakeref to be active during its deferral.

For instance, during engine parking we may choose to emit an idle
barrier (a request). To do so, we borrow the engine->kernel_context
timeline and to ensure exclusive access we keep the
engine->wakeref.count as 0. However, to submit that request to HW may
require a intel_engine_pm_get() (e.g. to keep the submission tasklet
alive) and before we allow that we have to rewake our wakeref to avoid a
recursive deadlock.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c |  8 ++-
 drivers/gpu/drm/i915/i915_request.c       | 66 ++++++++++++-----------
 drivers/gpu/drm/i915/i915_request.h       |  2 +
 drivers/gpu/drm/i915/i915_scheduler.c     |  3 +-
 drivers/gpu/drm/i915/intel_wakeref.c      |  4 +-
 drivers/gpu/drm/i915/intel_wakeref.h      | 11 ++++
 6 files changed, 56 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 6b15e3335dd6..ad37c9808c1f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -68,9 +68,13 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 
 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
-
 	i915_request_add_active_barriers(rq);
+
+	rq->sched.attr.priority = INT_MAX; /* Preemption barrier */
+
 	__i915_request_commit(rq);
+	__intel_wakeref_defer_park(&engine->wakeref);
+	__i915_request_queue(rq, NULL);
 
 	return false;
 }
@@ -98,7 +102,7 @@ static int __engine_park(struct intel_wakeref *wf)
 	intel_engine_pool_park(&engine->pool);
 
 	/* Must be reset upon idling, or we may miss the busy wakeup. */
-	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
+	engine->execlists.queue_priority_hint = INT_MIN;
 
 	if (engine->park)
 		engine->park(engine);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 43175bada09e..4703aab3ae21 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1186,6 +1186,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 		list_add(&ring->active_link, &rq->i915->gt.active_rings);
 	rq->emitted_jiffies = jiffies;
 
+	return prev;
+}
+
+void __i915_request_queue(struct i915_request *rq,
+			  const struct i915_sched_attr *attr)
+{
 	/*
 	 * Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
@@ -1199,43 +1205,15 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	 */
 	local_bh_disable();
 	i915_sw_fence_commit(&rq->semaphore);
-	if (engine->schedule) {
-		struct i915_sched_attr attr = rq->gem_context->sched;
-
-		/*
-		 * Boost actual workloads past semaphores!
-		 *
-		 * With semaphores we spin on one engine waiting for another,
-		 * simply to reduce the latency of starting our work when
-		 * the signaler completes. However, if there is any other
-		 * work that we could be doing on this engine instead, that
-		 * is better utilisation and will reduce the overall duration
-		 * of the current work. To avoid PI boosting a semaphore
-		 * far in the distance past over useful work, we keep a history
-		 * of any semaphore use along our dependency chain.
-		 */
-		if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
-			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
-
-		/*
-		 * Boost priorities to new clients (new request flows).
-		 *
-		 * Allow interactive/synchronous clients to jump ahead of
-		 * the bulk clients. (FQ_CODEL)
-		 */
-		if (list_empty(&rq->sched.signalers_list))
-			attr.priority |= I915_PRIORITY_WAIT;
-
-		engine->schedule(rq, &attr);
-	}
+	if (attr && rq->engine->schedule)
+		rq->engine->schedule(rq, attr);
 	i915_sw_fence_commit(&rq->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
-
-	return prev;
 }
 
 void i915_request_add(struct i915_request *rq)
 {
+	struct i915_sched_attr attr = rq->gem_context->sched;
 	struct i915_request *prev;
 
 	lockdep_assert_held(&rq->timeline->mutex);
@@ -1245,6 +1223,32 @@ void i915_request_add(struct i915_request *rq)
 
 	prev = __i915_request_commit(rq);
 
+	/*
+	 * Boost actual workloads past semaphores!
+	 *
+	 * With semaphores we spin on one engine waiting for another,
+	 * simply to reduce the latency of starting our work when
+	 * the signaler completes. However, if there is any other
+	 * work that we could be doing on this engine instead, that
+	 * is better utilisation and will reduce the overall duration
+	 * of the current work. To avoid PI boosting a semaphore
+	 * far in the distance past over useful work, we keep a history
+	 * of any semaphore use along our dependency chain.
+	 */
+	if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
+		attr.priority |= I915_PRIORITY_NOSEMAPHORE;
+
+	/*
+	 * Boost priorities to new clients (new request flows).
+	 *
+	 * Allow interactive/synchronous clients to jump ahead of
+	 * the bulk clients. (FQ_CODEL)
+	 */
+	if (list_empty(&rq->sched.signalers_list))
+		attr.priority |= I915_PRIORITY_WAIT;
+
+	__i915_request_queue(rq, &attr);
+
 	/*
 	 * In typical scenarios, we do not expect the previous request on
 	 * the timeline to be still tracked by timeline->last_request if it
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 313df3c37158..fec1d5f17c94 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -251,6 +251,8 @@ struct i915_request * __must_check
 i915_request_create(struct intel_context *ce);
 
 struct i915_request *__i915_request_commit(struct i915_request *request);
+void __i915_request_queue(struct i915_request *rq,
+			  const struct i915_sched_attr *attr);
 
 void i915_request_retire_upto(struct i915_request *rq);
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 0bd452e851d8..7b84ebca2901 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -349,8 +349,7 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
 	unsigned long flags;
 
 	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
-
-	if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
+	if (READ_ONCE(rq->sched.attr.priority) & bump)
 		return;
 
 	spin_lock_irqsave(&schedule_lock, flags);
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index d4443e81c1c8..868cc78048d0 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -57,12 +57,10 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
 	if (!atomic_dec_and_test(&wf->count))
 		goto unlock;
 
+	/* ops->put() must reschedule its own release on error/deferral */
 	if (likely(!wf->ops->put(wf))) {
 		rpm_put(wf);
 		wake_up_var(&wf->wakeref);
-	} else {
-		/* ops->put() must schedule its own release on deferral */
-		atomic_set_release(&wf->count, 1);
 	}
 
 unlock:
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 535a3a12864b..5f0c972a80fb 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -163,6 +163,17 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
 	return READ_ONCE(wf->wakeref);
 }
 
+/**
+ * __intel_wakeref_defer_park: Defer the current park callback
+ * @wf: the wakeref
+ */
+static inline void
+__intel_wakeref_defer_park(struct intel_wakeref *wf)
+{
+	INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
+	atomic_set_release(&wf->count, 1);
+}
+
 /**
  * intel_wakeref_wait_for_idle: Wait until the wakeref is idle
  * @wf: the wakeref
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 03/18] drm/i915/gt: Save/restore interrupts around breadcrumb disable
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
  2019-08-12 13:38 ` [PATCH 02/18] drm/i915: Push the wakeref->count deferral to the backend Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 04/18] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
                   ` (20 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

Stop assuming we only get called with irqs-on for disarming the
breadcrumbs, and do a full save/restore spin_lock_irq.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index e1bbc9b428cd..90db41d173df 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -67,14 +67,15 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	unsigned long flags;
 
 	if (!b->irq_armed)
 		return;
 
-	spin_lock_irq(&b->irq_lock);
+	spin_lock_irqsave(&b->irq_lock, flags);
 	if (b->irq_armed)
 		__intel_breadcrumbs_disarm_irq(b);
-	spin_unlock_irq(&b->irq_lock);
+	spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
 static inline bool __request_completed(const struct i915_request *rq)
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 04/18] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
  2019-08-12 13:38 ` [PATCH 02/18] drm/i915: Push the wakeref->count deferral to the backend Chris Wilson
  2019-08-12 13:39 ` [PATCH 03/18] drm/i915/gt: Save/restore interrupts around breadcrumb disable Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 05/18] drm/i915/gt: Track timeline activeness in enter/exit Chris Wilson
                   ` (19 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

If we only call process_csb() from the tasklet, though we lose the
ability to bypass ksoftirqd interrupt processing on direct submission
paths, we can push it out of the irq-off spinlock.

The penalty is that we then allow schedule_out to be called concurrently
with schedule_in requiring us to handle the usage count (baked into the
pointer itself) atomically.

As we do kick the tasklets (via local_bh_enable()) after our submission,
there is a possibility there to see if we can pull the local softirq
processing back from the ksoftirqd.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 130 +++++++++++-------
 drivers/gpu/drm/i915/i915_utils.h             |  20 ++-
 4 files changed, 94 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index a632b20ec4d8..d8ce266c049f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -41,9 +41,7 @@ struct intel_context {
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
 #define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
-#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
-#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
-#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
+#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
 
 	struct i915_address_space *vm;
 	struct i915_gem_context *gem_context;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c7b241417ee1..13a569907c3d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1459,7 +1459,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 		for (port = execlists->pending; (rq = *port); port++) {
 			/* Exclude any contexts already counted in active */
-			if (intel_context_inflight_count(rq->hw_context) == 1)
+			if (!intel_context_inflight_count(rq->hw_context))
 				engine->stats.active++;
 		}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 5c26c4ae139b..945f3acc2e75 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -547,27 +547,39 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
+static inline struct intel_engine_cs *
+__execlists_schedule_in(struct i915_request *rq)
+{
+	struct intel_engine_cs * const engine = rq->engine;
+	struct intel_context * const ce = rq->hw_context;
+
+	intel_context_get(ce);
+
+	intel_gt_pm_get(engine->gt);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+	intel_engine_context_in(engine);
+
+	return engine;
+}
+
 static inline struct i915_request *
 execlists_schedule_in(struct i915_request *rq, int idx)
 {
-	struct intel_context *ce = rq->hw_context;
-	int count;
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *old;
 
+	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
 	trace_i915_request_in(rq, idx);
 
-	count = intel_context_inflight_count(ce);
-	if (!count) {
-		intel_context_get(ce);
-		ce->inflight = rq->engine;
-
-		intel_gt_pm_get(ce->inflight->gt);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-		intel_engine_context_in(ce->inflight);
-	}
+	old = READ_ONCE(ce->inflight);
+	do {
+		if (!old) {
+			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+			break;
+		}
+	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
 
-	intel_context_inflight_inc(ce);
 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
-
 	return i915_request_get(rq);
 }
 
@@ -581,35 +593,45 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 }
 
 static inline void
-execlists_schedule_out(struct i915_request *rq)
+__execlists_schedule_out(struct i915_request *rq)
 {
-	struct intel_context *ce = rq->hw_context;
+	struct intel_engine_cs * const engine = rq->engine;
+	struct intel_context * const ce = rq->hw_context;
 
-	GEM_BUG_ON(!intel_context_inflight_count(ce));
+	intel_engine_context_out(engine);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+	intel_gt_pm_put(engine->gt);
 
-	trace_i915_request_out(rq);
+	/*
+	 * If this is part of a virtual engine, its next request may
+	 * have been blocked waiting for access to the active context.
+	 * We have to kick all the siblings again in case we need to
+	 * switch (e.g. the next request is not runnable on this
+	 * engine). Hopefully, we will already have submitted the next
+	 * request before the tasklet runs and do not need to rebuild
+	 * each virtual tree and kick everyone again.
+	 */
+	if (ce->engine != engine)
+		kick_siblings(rq, ce);
 
-	intel_context_inflight_dec(ce);
-	if (!intel_context_inflight_count(ce)) {
-		intel_engine_context_out(ce->inflight);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
-		intel_gt_pm_put(ce->inflight->gt);
+	intel_context_put(ce);
+}
 
-		/*
-		 * If this is part of a virtual engine, its next request may
-		 * have been blocked waiting for access to the active context.
-		 * We have to kick all the siblings again in case we need to
-		 * switch (e.g. the next request is not runnable on this
-		 * engine). Hopefully, we will already have submitted the next
-		 * request before the tasklet runs and do not need to rebuild
-		 * each virtual tree and kick everyone again.
-		 */
-		ce->inflight = NULL;
-		if (rq->engine != ce->engine)
-			kick_siblings(rq, ce);
+static inline void
+execlists_schedule_out(struct i915_request *rq)
+{
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *cur, *old;
 
-		intel_context_put(ce);
-	}
+	trace_i915_request_out(rq);
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+
+	old = READ_ONCE(ce->inflight);
+	do
+		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
+	while (!try_cmpxchg(&ce->inflight, &old, cur));
+	if (!cur)
+		__execlists_schedule_out(rq);
 
 	i915_request_put(rq);
 }
@@ -684,6 +706,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 
 	trace_ports(execlists, msg, execlists->pending);
 
+	if (!execlists->pending[0])
+		return false;
+
 	if (execlists->pending[execlists_num_ports(execlists)])
 		return false;
 
@@ -944,9 +969,21 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
 static bool
 enable_timeslice(struct intel_engine_cs *engine)
 {
-	struct i915_request *last = last_active(&engine->execlists);
+	struct i915_request * const *port;
+	int hint;
+
+	port = engine->execlists.active;
+	while (port[0] && i915_request_completed(port[0]))
+		port++;
+	if (!port[0])
+		return false;
 
-	return last && need_timeslice(engine, last);
+	hint = engine->execlists.queue_priority_hint;
+	if (port[1])
+		hint = max(rq_prio(port[1]), hint);
+
+	/* Compare the two end-points as an unlocked approximation */
+	return hint >= effective_prio(port[0]);
 }
 
 static void record_preemption(struct intel_engine_execlists *execlists)
@@ -1356,7 +1393,6 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
-	lockdep_assert_held(&engine->active.lock);
 	GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
 
 	/*
@@ -1427,15 +1463,14 @@ static void process_csb(struct intel_engine_cs *engine)
 				       execlists->pending,
 				       execlists_num_ports(execlists) *
 				       sizeof(*execlists->pending));
-			execlists->pending[0] = NULL;
-
-			trace_ports(execlists, "promoted", execlists->active);
 
 			if (enable_timeslice(engine))
 				mod_timer(&execlists->timer, jiffies + 1);
 
 			if (!inject_preempt_hang(execlists))
 				ring_set_paused(engine, 0);
+
+			WRITE_ONCE(execlists->pending[0], NULL);
 			break;
 
 		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
@@ -1479,8 +1514,6 @@ static void process_csb(struct intel_engine_cs *engine)
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
 	lockdep_assert_held(&engine->active.lock);
-
-	process_csb(engine);
 	if (!engine->execlists.pending[0])
 		execlists_dequeue(engine);
 }
@@ -1494,9 +1527,12 @@ static void execlists_submission_tasklet(unsigned long data)
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->active.lock, flags);
-	__execlists_submission_tasklet(engine);
-	spin_unlock_irqrestore(&engine->active.lock, flags);
+	process_csb(engine);
+	if (!engine->execlists.pending[0]) {
+		spin_lock_irqsave(&engine->active.lock, flags);
+		__execlists_submission_tasklet(engine);
+		spin_unlock_irqrestore(&engine->active.lock, flags);
+	}
 }
 
 static void execlists_submission_timer(struct timer_list *timer)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index d652ba5d2320..562f756da421 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -161,17 +161,15 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
 	((typeof(ptr))((unsigned long)(ptr) | __bits));			\
 })
 
-#define ptr_count_dec(p_ptr) do {					\
-	typeof(p_ptr) __p = (p_ptr);					\
-	unsigned long __v = (unsigned long)(*__p);			\
-	*__p = (typeof(*p_ptr))(--__v);					\
-} while (0)
-
-#define ptr_count_inc(p_ptr) do {					\
-	typeof(p_ptr) __p = (p_ptr);					\
-	unsigned long __v = (unsigned long)(*__p);			\
-	*__p = (typeof(*p_ptr))(++__v);					\
-} while (0)
+#define ptr_dec(ptr) ({							\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(typeof(ptr))(__v - 1);						\
+})
+
+#define ptr_inc(ptr) ({							\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(typeof(ptr))(__v + 1);						\
+})
 
 #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
 #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 05/18] drm/i915/gt: Track timeline activeness in enter/exit
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (2 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 04/18] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 06/18] drm/i915/gt: Convert timeline tracking to spinlock Chris Wilson
                   ` (18 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

Lift moving the timeline to/from the active_list on enter/exit in order
to shorten the active tracking span in comparison to the existing
pin/unpin.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c        |  1 -
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 +
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  2 +
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  4 +
 drivers/gpu/drm/i915/gt/intel_timeline.c      | 98 +++++++------------
 drivers/gpu/drm/i915/gt/intel_timeline.h      |  3 +-
 .../gpu/drm/i915/gt/intel_timeline_types.h    | 18 ++++
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 -
 8 files changed, 64 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 17e3618241c5..92e53c25424c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -37,7 +37,6 @@ static void i915_gem_park(struct drm_i915_private *i915)
 	for_each_engine(engine, i915, id)
 		call_idle_barriers(engine); /* cleanup after wedging */
 
-	intel_timelines_park(i915);
 	i915_vma_parked(i915);
 
 	i915_globals_park();
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 77833f1558a9..9114953bf920 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -280,10 +280,12 @@ int __init i915_global_context_init(void)
 void intel_context_enter_engine(struct intel_context *ce)
 {
 	intel_engine_pm_get(ce->engine);
+	intel_timeline_enter(ce->timeline);
 }
 
 void intel_context_exit_engine(struct intel_context *ce)
 {
+	intel_timeline_exit(ce->timeline);
 	intel_engine_pm_put(ce->engine);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index ad37c9808c1f..d6a00a04ed6d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -66,6 +66,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 		/* Context switch failed, hope for the best! Maybe reset? */
 		return true;
 
+	intel_timeline_enter(rq->timeline);
+
 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
 	i915_request_add_active_barriers(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 945f3acc2e75..b5618e6c1361 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3255,6 +3255,8 @@ static void virtual_context_enter(struct intel_context *ce)
 
 	for (n = 0; n < ve->num_siblings; n++)
 		intel_engine_pm_get(ve->siblings[n]);
+
+	intel_timeline_enter(ce->timeline);
 }
 
 static void virtual_context_exit(struct intel_context *ce)
@@ -3262,6 +3264,8 @@ static void virtual_context_exit(struct intel_context *ce)
 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
 	unsigned int n;
 
+	intel_timeline_exit(ce->timeline);
+
 	for (n = 0; n < ve->num_siblings; n++)
 		intel_engine_pm_put(ve->siblings[n]);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 6daa9eb59e19..4af0b9801d91 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -278,64 +278,11 @@ void intel_timelines_init(struct drm_i915_private *i915)
 	timelines_init(&i915->gt);
 }
 
-static void timeline_add_to_active(struct intel_timeline *tl)
-{
-	struct intel_gt_timelines *gt = &tl->gt->timelines;
-
-	mutex_lock(&gt->mutex);
-	list_add(&tl->link, &gt->active_list);
-	mutex_unlock(&gt->mutex);
-}
-
-static void timeline_remove_from_active(struct intel_timeline *tl)
-{
-	struct intel_gt_timelines *gt = &tl->gt->timelines;
-
-	mutex_lock(&gt->mutex);
-	list_del(&tl->link);
-	mutex_unlock(&gt->mutex);
-}
-
-static void timelines_park(struct intel_gt *gt)
-{
-	struct intel_gt_timelines *timelines = &gt->timelines;
-	struct intel_timeline *timeline;
-
-	mutex_lock(&timelines->mutex);
-	list_for_each_entry(timeline, &timelines->active_list, link) {
-		/*
-		 * All known fences are completed so we can scrap
-		 * the current sync point tracking and start afresh,
-		 * any attempt to wait upon a previous sync point
-		 * will be skipped as the fence was signaled.
-		 */
-		i915_syncmap_free(&timeline->sync);
-	}
-	mutex_unlock(&timelines->mutex);
-}
-
-/**
- * intel_timelines_park - called when the driver idles
- * @i915: the drm_i915_private device
- *
- * When the driver is completely idle, we know that all of our sync points
- * have been signaled and our tracking is then entirely redundant. Any request
- * to wait upon an older sync point will be completed instantly as we know
- * the fence is signaled and therefore we will not even look them up in the
- * sync point map.
- */
-void intel_timelines_park(struct drm_i915_private *i915)
-{
-	timelines_park(&i915->gt);
-}
-
 void intel_timeline_fini(struct intel_timeline *timeline)
 {
 	GEM_BUG_ON(timeline->pin_count);
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
-	i915_syncmap_free(&timeline->sync);
-
 	if (timeline->hwsp_cacheline)
 		cacheline_free(timeline->hwsp_cacheline);
 	else
@@ -370,6 +317,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
 	if (tl->pin_count++)
 		return 0;
 	GEM_BUG_ON(!tl->pin_count);
+	GEM_BUG_ON(tl->active_count);
 
 	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
@@ -380,7 +328,6 @@ int intel_timeline_pin(struct intel_timeline *tl)
 		offset_in_page(tl->hwsp_offset);
 
 	cacheline_acquire(tl->hwsp_cacheline);
-	timeline_add_to_active(tl);
 
 	return 0;
 
@@ -389,6 +336,40 @@ int intel_timeline_pin(struct intel_timeline *tl)
 	return err;
 }
 
+void intel_timeline_enter(struct intel_timeline *tl)
+{
+	struct intel_gt_timelines *timelines = &tl->gt->timelines;
+
+	GEM_BUG_ON(!tl->pin_count);
+	if (tl->active_count++)
+		return;
+	GEM_BUG_ON(!tl->active_count); /* overflow? */
+
+	mutex_lock(&timelines->mutex);
+	list_add(&tl->link, &timelines->active_list);
+	mutex_unlock(&timelines->mutex);
+}
+
+void intel_timeline_exit(struct intel_timeline *tl)
+{
+	struct intel_gt_timelines *timelines = &tl->gt->timelines;
+
+	GEM_BUG_ON(!tl->active_count);
+	if (--tl->active_count)
+		return;
+
+	mutex_lock(&timelines->mutex);
+	list_del(&tl->link);
+	mutex_unlock(&timelines->mutex);
+
+	/*
+	 * Since this timeline is idle, all bariers upon which we were waiting
+	 * must also be complete and so we can discard the last used barriers
+	 * without loss of information.
+	 */
+	i915_syncmap_free(&tl->sync);
+}
+
 static u32 timeline_advance(struct intel_timeline *tl)
 {
 	GEM_BUG_ON(!tl->pin_count);
@@ -546,16 +527,9 @@ void intel_timeline_unpin(struct intel_timeline *tl)
 	if (--tl->pin_count)
 		return;
 
-	timeline_remove_from_active(tl);
+	GEM_BUG_ON(tl->active_count);
 	cacheline_release(tl->hwsp_cacheline);
 
-	/*
-	 * Since this timeline is idle, all bariers upon which we were waiting
-	 * must also be complete and so we can discard the last used barriers
-	 * without loss of information.
-	 */
-	i915_syncmap_free(&tl->sync);
-
 	__i915_vma_unpin(tl->hwsp_ggtt);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
index e08cebf64833..f583af1ba18d 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -77,9 +77,11 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
 }
 
 int intel_timeline_pin(struct intel_timeline *tl);
+void intel_timeline_enter(struct intel_timeline *tl);
 int intel_timeline_get_seqno(struct intel_timeline *tl,
 			     struct i915_request *rq,
 			     u32 *seqno);
+void intel_timeline_exit(struct intel_timeline *tl);
 void intel_timeline_unpin(struct intel_timeline *tl);
 
 int intel_timeline_read_hwsp(struct i915_request *from,
@@ -87,7 +89,6 @@ int intel_timeline_read_hwsp(struct i915_request *from,
 			     u32 *hwsp_offset);
 
 void intel_timelines_init(struct drm_i915_private *i915);
-void intel_timelines_park(struct drm_i915_private *i915);
 void intel_timelines_fini(struct drm_i915_private *i915);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 9a71aea7a338..b1a9f0c54bf0 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -25,7 +25,25 @@ struct intel_timeline {
 
 	struct mutex mutex; /* protects the flow of requests */
 
+	/*
+	 * pin_count and active_count track essentially the same thing:
+	 * How many requests are in flight or may be under construction.
+	 *
+	 * We need two distinct counters so that we can assign different
+	 * lifetimes to the events for different use-cases. For example,
+	 * we want to permanently keep the timeline pinned for the kernel
+	 * context so that we can issue requests at any time without having
+	 * to acquire space in the GGTT. However, we want to keep tracking
+	 * the activity (to be able to detect when we become idle) along that
+	 * permanently pinned timeline and so end up requiring two counters.
+	 *
+	 * Note that the active_count is protected by the intel_timeline.mutex,
+	 * but the pin_count is protected by a combination of serialisation
+	 * from the intel_context caller plus internal atomicity.
+	 */
 	unsigned int pin_count;
+	unsigned int active_count;
+
 	const u32 *hwsp_seqno;
 	struct i915_vma *hwsp_ggtt;
 	u32 hwsp_offset;
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index f0a840030382..d54113697745 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -816,8 +816,6 @@ static int live_hwsp_recycle(void *arg)
 
 			if (err)
 				goto out;
-
-			intel_timelines_park(i915); /* Encourage recycling! */
 		} while (!__igt_timeout(end_time, NULL));
 	}
 
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 06/18] drm/i915/gt: Convert timeline tracking to spinlock
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (3 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 05/18] drm/i915/gt: Track timeline activeness in enter/exit Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 07/18] drm/i915/gt: Guard timeline pinning with its own mutex Chris Wilson
                   ` (17 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

Convert the list manipulation of active to use spinlocks so that we can
perform the updates from underneath a quick interrupt callback.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_reset.c    | 10 ++++++++--
 drivers/gpu/drm/i915/gt/intel_timeline.c | 12 +++++-------
 drivers/gpu/drm/i915/i915_gem.c          | 14 +++++++-------
 4 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 789102f4f46b..3b34b658de3f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -40,7 +40,7 @@ struct intel_gt {
 	struct intel_uc uc;
 
 	struct intel_gt_timelines {
-		struct mutex mutex; /* protects list */
+		spinlock_t lock; /* protects active_list */
 		struct list_head active_list;
 
 		/* Pack multiple timelines' seqnos into the same page */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index ec85740de942..077716442c90 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -811,7 +811,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 	 *
 	 * No more can be submitted until we reset the wedged bit.
 	 */
-	mutex_lock(&timelines->mutex);
+	spin_lock(&timelines->lock);
 	list_for_each_entry(tl, &timelines->active_list, link) {
 		struct i915_request *rq;
 
@@ -819,6 +819,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		if (!rq)
 			continue;
 
+		spin_unlock(&timelines->lock);
+
 		/*
 		 * All internal dependencies (i915_requests) will have
 		 * been flushed by the set-wedge, but we may be stuck waiting
@@ -828,8 +830,12 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
 		 */
 		dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
 		i915_request_put(rq);
+
+		/* Restart iteration after droping lock */
+		spin_lock(&timelines->lock);
+		tl = list_entry(&timelines->active_list, typeof(*tl), link);
 	}
-	mutex_unlock(&timelines->mutex);
+	spin_unlock(&timelines->lock);
 
 	intel_gt_sanitize(gt, false);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 4af0b9801d91..355dfc52c804 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -266,7 +266,7 @@ static void timelines_init(struct intel_gt *gt)
 {
 	struct intel_gt_timelines *timelines = &gt->timelines;
 
-	mutex_init(&timelines->mutex);
+	spin_lock_init(&timelines->lock);
 	INIT_LIST_HEAD(&timelines->active_list);
 
 	spin_lock_init(&timelines->hwsp_lock);
@@ -345,9 +345,9 @@ void intel_timeline_enter(struct intel_timeline *tl)
 		return;
 	GEM_BUG_ON(!tl->active_count); /* overflow? */
 
-	mutex_lock(&timelines->mutex);
+	spin_lock(&timelines->lock);
 	list_add(&tl->link, &timelines->active_list);
-	mutex_unlock(&timelines->mutex);
+	spin_unlock(&timelines->lock);
 }
 
 void intel_timeline_exit(struct intel_timeline *tl)
@@ -358,9 +358,9 @@ void intel_timeline_exit(struct intel_timeline *tl)
 	if (--tl->active_count)
 		return;
 
-	mutex_lock(&timelines->mutex);
+	spin_lock(&timelines->lock);
 	list_del(&tl->link);
-	mutex_unlock(&timelines->mutex);
+	spin_unlock(&timelines->lock);
 
 	/*
 	 * Since this timeline is idle, all bariers upon which we were waiting
@@ -548,8 +548,6 @@ static void timelines_fini(struct intel_gt *gt)
 
 	GEM_BUG_ON(!list_empty(&timelines->active_list));
 	GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
-
-	mutex_destroy(&timelines->mutex);
 }
 
 void intel_timelines_fini(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4752a3bf9636..29be25a7aade 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -897,18 +897,18 @@ static long
 wait_for_timelines(struct drm_i915_private *i915,
 		   unsigned int flags, long timeout)
 {
-	struct intel_gt_timelines *gt = &i915->gt.timelines;
+	struct intel_gt_timelines *timelines = &i915->gt.timelines;
 	struct intel_timeline *tl;
 
-	mutex_lock(&gt->mutex);
-	list_for_each_entry(tl, &gt->active_list, link) {
+	spin_lock(&timelines->lock);
+	list_for_each_entry(tl, &timelines->active_list, link) {
 		struct i915_request *rq;
 
 		rq = i915_active_request_get_unlocked(&tl->last_request);
 		if (!rq)
 			continue;
 
-		mutex_unlock(&gt->mutex);
+		spin_unlock(&timelines->lock);
 
 		/*
 		 * "Race-to-idle".
@@ -928,10 +928,10 @@ wait_for_timelines(struct drm_i915_private *i915,
 			return timeout;
 
 		/* restart after reacquiring the lock */
-		mutex_lock(&gt->mutex);
-		tl = list_entry(&gt->active_list, typeof(*tl), link);
+		spin_lock(&timelines->lock);
+		tl = list_entry(&timelines->active_list, typeof(*tl), link);
 	}
-	mutex_unlock(&gt->mutex);
+	spin_unlock(&timelines->lock);
 
 	return timeout;
 }
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 07/18] drm/i915/gt: Guard timeline pinning with its own mutex
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (4 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 06/18] drm/i915/gt: Convert timeline tracking to spinlock Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 08/18] drm/i915: Protect request retirement with timeline->mutex Chris Wilson
                   ` (16 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

In preparation for removing struct_mutex from around context retirement,
we need to make timeline pinning safe. Since multiple engines/contexts
can share a single timeline, it needs to be protected by a mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_timeline.c      | 27 +++++++++----------
 .../gpu/drm/i915/gt/intel_timeline_types.h    |  2 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |  6 ++---
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 355dfc52c804..7b476cd55dac 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -211,9 +211,9 @@ int intel_timeline_init(struct intel_timeline *timeline,
 	void *vaddr;
 
 	kref_init(&timeline->kref);
+	atomic_set(&timeline->pin_count, 0);
 
 	timeline->gt = gt;
-	timeline->pin_count = 0;
 
 	timeline->has_initial_breadcrumb = !hwsp;
 	timeline->hwsp_cacheline = NULL;
@@ -280,7 +280,7 @@ void intel_timelines_init(struct drm_i915_private *i915)
 
 void intel_timeline_fini(struct intel_timeline *timeline)
 {
-	GEM_BUG_ON(timeline->pin_count);
+	GEM_BUG_ON(atomic_read(&timeline->pin_count));
 	GEM_BUG_ON(!list_empty(&timeline->requests));
 
 	if (timeline->hwsp_cacheline)
@@ -314,33 +314,31 @@ int intel_timeline_pin(struct intel_timeline *tl)
 {
 	int err;
 
-	if (tl->pin_count++)
+	if (atomic_add_unless(&tl->pin_count, 1, 0))
 		return 0;
-	GEM_BUG_ON(!tl->pin_count);
-	GEM_BUG_ON(tl->active_count);
 
 	err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
-		goto unpin;
+		return err;
 
 	tl->hwsp_offset =
 		i915_ggtt_offset(tl->hwsp_ggtt) +
 		offset_in_page(tl->hwsp_offset);
 
 	cacheline_acquire(tl->hwsp_cacheline);
+	if (atomic_fetch_inc(&tl->pin_count)) {
+		cacheline_release(tl->hwsp_cacheline);
+		__i915_vma_unpin(tl->hwsp_ggtt);
+	}
 
 	return 0;
-
-unpin:
-	tl->pin_count = 0;
-	return err;
 }
 
 void intel_timeline_enter(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
 
-	GEM_BUG_ON(!tl->pin_count);
+	GEM_BUG_ON(!atomic_read(&tl->pin_count));
 	if (tl->active_count++)
 		return;
 	GEM_BUG_ON(!tl->active_count); /* overflow? */
@@ -372,7 +370,7 @@ void intel_timeline_exit(struct intel_timeline *tl)
 
 static u32 timeline_advance(struct intel_timeline *tl)
 {
-	GEM_BUG_ON(!tl->pin_count);
+	GEM_BUG_ON(!atomic_read(&tl->pin_count));
 	GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
 
 	return tl->seqno += 1 + tl->has_initial_breadcrumb;
@@ -523,11 +521,10 @@ int intel_timeline_read_hwsp(struct i915_request *from,
 
 void intel_timeline_unpin(struct intel_timeline *tl)
 {
-	GEM_BUG_ON(!tl->pin_count);
-	if (--tl->pin_count)
+	GEM_BUG_ON(!atomic_read(&tl->pin_count));
+	if (!atomic_dec_and_test(&tl->pin_count))
 		return;
 
-	GEM_BUG_ON(tl->active_count);
 	cacheline_release(tl->hwsp_cacheline);
 
 	__i915_vma_unpin(tl->hwsp_ggtt);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index b1a9f0c54bf0..2b1baf2fcc8e 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -41,7 +41,7 @@ struct intel_timeline {
 	 * but the pin_count is protected by a combination of serialisation
 	 * from the intel_context caller plus internal atomicity.
 	 */
-	unsigned int pin_count;
+	atomic_t pin_count;
 	unsigned int active_count;
 
 	const u32 *hwsp_seqno;
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index a63dd8a42cd4..54a11dde3076 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -34,13 +34,13 @@
 
 static void mock_timeline_pin(struct intel_timeline *tl)
 {
-	tl->pin_count++;
+	atomic_inc(&tl->pin_count);
 }
 
 static void mock_timeline_unpin(struct intel_timeline *tl)
 {
-	GEM_BUG_ON(!tl->pin_count);
-	tl->pin_count--;
+	GEM_BUG_ON(!atomic_read(&tl->pin_count));
+	atomic_dec(&tl->pin_count);
 }
 
 static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 08/18] drm/i915: Protect request retirement with timeline->mutex
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (5 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 07/18] drm/i915/gt: Guard timeline pinning with its own mutex Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 09/18] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
                   ` (15 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

Forgo the struct_mutex requirement for request retirement as we have
been transitioning over to only using the timeline->mutex for
controlling the lifetime of a request on that timeline.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 183 ++++++++++--------
 drivers/gpu/drm/i915/gt/intel_context.h       |  18 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   1 -
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   3 -
 drivers/gpu/drm/i915/gt/intel_gt.c            |   2 -
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |   2 -
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |  19 +-
 drivers/gpu/drm/i915/gt/mock_engine.c         |   1 -
 drivers/gpu/drm/i915/gt/selftest_context.c    |   9 +-
 drivers/gpu/drm/i915/i915_request.c           | 156 +++++++--------
 drivers/gpu/drm/i915/i915_request.h           |   3 -
 12 files changed, 209 insertions(+), 189 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 533db2b1fae9..b8432c3437e9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -735,63 +735,6 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	return 0;
 }
 
-static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
-{
-	struct i915_request *rq;
-
-	/*
-	 * Completely unscientific finger-in-the-air estimates for suitable
-	 * maximum user request size (to avoid blocking) and then backoff.
-	 */
-	if (intel_ring_update_space(ring) >= PAGE_SIZE)
-		return NULL;
-
-	/*
-	 * Find a request that after waiting upon, there will be at least half
-	 * the ring available. The hysteresis allows us to compete for the
-	 * shared ring and should mean that we sleep less often prior to
-	 * claiming our resources, but not so long that the ring completely
-	 * drains before we can submit our next request.
-	 */
-	list_for_each_entry(rq, &ring->request_list, ring_link) {
-		if (__intel_ring_space(rq->postfix,
-				       ring->emit, ring->size) > ring->size / 2)
-			break;
-	}
-	if (&rq->ring_link == &ring->request_list)
-		return NULL; /* weird, we will check again later for real */
-
-	return i915_request_get(rq);
-}
-
-static int eb_wait_for_ring(const struct i915_execbuffer *eb)
-{
-	struct i915_request *rq;
-	int ret = 0;
-
-	/*
-	 * Apply a light amount of backpressure to prevent excessive hogs
-	 * from blocking waiting for space whilst holding struct_mutex and
-	 * keeping all of their resources pinned.
-	 */
-
-	rq = __eb_wait_for_ring(eb->context->ring);
-	if (rq) {
-		mutex_unlock(&eb->i915->drm.struct_mutex);
-
-		if (i915_request_wait(rq,
-				      I915_WAIT_INTERRUPTIBLE,
-				      MAX_SCHEDULE_TIMEOUT) < 0)
-			ret = -EINTR;
-
-		i915_request_put(rq);
-
-		mutex_lock(&eb->i915->drm.struct_mutex);
-	}
-
-	return ret;
-}
-
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
 	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
@@ -2132,10 +2075,75 @@ static const enum intel_engine_id user_ring_map[] = {
 	[I915_EXEC_VEBOX]	= VECS0
 };
 
-static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+static struct i915_request *eb_throttle(struct intel_context *ce)
+{
+	struct intel_ring *ring = ce->ring;
+	struct intel_timeline *tl = ce->timeline;
+	struct i915_request *rq;
+
+	/*
+	 * Completely unscientific finger-in-the-air estimates for suitable
+	 * maximum user request size (to avoid blocking) and then backoff.
+	 */
+	if (intel_ring_update_space(ring) >= PAGE_SIZE)
+		return NULL;
+
+	/*
+	 * Find a request that after waiting upon, there will be at least half
+	 * the ring available. The hysteresis allows us to compete for the
+	 * shared ring and should mean that we sleep less often prior to
+	 * claiming our resources, but not so long that the ring completely
+	 * drains before we can submit our next request.
+	 */
+	list_for_each_entry(rq, &tl->requests, link) {
+		if (rq->ring != ring)
+			continue;
+
+		if (__intel_ring_space(rq->postfix,
+				       ring->emit, ring->size) > ring->size / 2)
+			break;
+	}
+	if (&rq->link == &tl->requests)
+		return NULL; /* weird, we will check again later for real */
+
+	return i915_request_get(rq);
+}
+
+static int
+__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 {
 	int err;
 
+	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+		return 0;
+
+	err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+	if (err)
+		return err;
+
+	err = __intel_context_do_pin(ce);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+
+	return err;
+}
+
+static void
+__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+		return;
+
+	mutex_lock(&eb->i915->drm.struct_mutex);
+	intel_context_unpin(ce);
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+}
+
+static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	struct intel_timeline *tl;
+	struct i915_request *rq;
+	int err;
+
 	/*
 	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 	 * EIO if the GPU is already wedged.
@@ -2149,7 +2157,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	err = intel_context_pin(ce);
+	err = __eb_pin_context(eb, ce);
 	if (err)
 		return err;
 
@@ -2161,23 +2169,43 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 	 * until the timeline is idle, which in turn releases the wakeref
 	 * taken on the engine, and the parent device.
 	 */
-	err = intel_context_timeline_lock(ce);
-	if (err)
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl)) {
+		err = PTR_ERR(tl);
 		goto err_unpin;
+	}
 
 	intel_context_enter(ce);
-	intel_context_timeline_unlock(ce);
+	rq = eb_throttle(ce);
+
+	intel_context_timeline_unlock(tl);
+
+	if (rq) {
+		if (i915_request_wait(rq,
+				      I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0) {
+			i915_request_put(rq);
+			err = -EINTR;
+			goto err_exit;
+		}
+
+		i915_request_put(rq);
+	}
 
 	eb->engine = ce->engine;
 	eb->context = ce;
 	return 0;
 
+err_exit:
+	mutex_lock(&tl->mutex);
+	intel_context_exit(ce);
+	intel_context_timeline_unlock(tl);
 err_unpin:
-	intel_context_unpin(ce);
+	__eb_unpin_context(eb, ce);
 	return err;
 }
 
-static void eb_unpin_context(struct i915_execbuffer *eb)
+static void eb_unpin_engine(struct i915_execbuffer *eb)
 {
 	struct intel_context *ce = eb->context;
 	struct intel_timeline *tl = ce->timeline;
@@ -2186,7 +2214,7 @@ static void eb_unpin_context(struct i915_execbuffer *eb)
 	intel_context_exit(ce);
 	mutex_unlock(&tl->mutex);
 
-	intel_context_unpin(ce);
+	__eb_unpin_context(eb, ce);
 }
 
 static unsigned int
@@ -2231,9 +2259,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb,
 }
 
 static int
-eb_select_engine(struct i915_execbuffer *eb,
-		 struct drm_file *file,
-		 struct drm_i915_gem_execbuffer2 *args)
+eb_pin_engine(struct i915_execbuffer *eb,
+	      struct drm_file *file,
+	      struct drm_i915_gem_execbuffer2 *args)
 {
 	struct intel_context *ce;
 	unsigned int idx;
@@ -2248,7 +2276,7 @@ eb_select_engine(struct i915_execbuffer *eb,
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
-	err = eb_pin_context(eb, ce);
+	err = __eb_pin_engine(eb, ce);
 	intel_context_put(ce);
 
 	return err;
@@ -2466,16 +2494,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	err = i915_mutex_lock_interruptible(dev);
-	if (err)
-		goto err_context;
-
-	err = eb_select_engine(&eb, file, args);
+	err = eb_pin_engine(&eb, file, args);
 	if (unlikely(err))
-		goto err_unlock;
+		goto err_context;
 
-	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
-	if (unlikely(err))
+	err = i915_mutex_lock_interruptible(dev);
+	if (err)
 		goto err_engine;
 
 	err = eb_relocate(&eb);
@@ -2633,10 +2657,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
-err_engine:
-	eb_unpin_context(&eb);
-err_unlock:
 	mutex_unlock(&dev->struct_mutex);
+err_engine:
+	eb_unpin_engine(&eb);
 err_context:
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 9fa8b588f18e..053a1307ecb4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -12,6 +12,7 @@
 #include "i915_active.h"
 #include "intel_context_types.h"
 #include "intel_engine_types.h"
+#include "intel_timeline_types.h"
 
 void intel_context_init(struct intel_context *ce,
 			struct i915_gem_context *ctx,
@@ -118,17 +119,24 @@ static inline void intel_context_put(struct intel_context *ce)
 	kref_put(&ce->ref, ce->ops->destroy);
 }
 
-static inline int __must_check
+static inline struct intel_timeline *__must_check
 intel_context_timeline_lock(struct intel_context *ce)
 	__acquires(&ce->timeline->mutex)
 {
-	return mutex_lock_interruptible(&ce->timeline->mutex);
+	struct intel_timeline *tl = ce->timeline;
+	int err;
+
+	err = mutex_lock_interruptible(&tl->mutex);
+	if (err)
+		return ERR_PTR(err);
+
+	return tl;
 }
 
-static inline void intel_context_timeline_unlock(struct intel_context *ce)
-	__releases(&ce->timeline->mutex)
+static inline void intel_context_timeline_unlock(struct intel_timeline *tl)
+	__releases(&tl->mutex)
 {
-	mutex_unlock(&ce->timeline->mutex);
+	mutex_unlock(&tl->mutex);
 }
 
 int intel_context_prepare_remote_request(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 13a569907c3d..8a9d3cd2c31c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -679,7 +679,6 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 				engine->status_page.vma))
 		goto out_frame;
 
-	INIT_LIST_HEAD(&frame->ring.request_list);
 	frame->ring.vaddr = frame->cs;
 	frame->ring.size = sizeof(frame->cs);
 	frame->ring.effective_size = frame->ring.size;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index a0f372807dd4..9965a32601d6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -69,9 +69,6 @@ struct intel_ring {
 	struct i915_vma *vma;
 	void *vaddr;
 
-	struct list_head request_list;
-	struct list_head active_link;
-
 	/*
 	 * As we have two types of rings, one global to the engine used
 	 * by ringbuffer submission and those that are exclusive to a
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index c543467a8a1c..b5277632137c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -13,9 +13,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 	gt->i915 = i915;
 	gt->uncore = &i915->uncore;
 
-	INIT_LIST_HEAD(&gt->active_rings);
 	INIT_LIST_HEAD(&gt->closed_vma);
-
 	spin_lock_init(&gt->closed_lock);
 
 	intel_gt_init_hangcheck(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 3b34b658de3f..8da7b9f1f46e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -48,8 +48,6 @@ struct intel_gt {
 		struct list_head hwsp_free_list;
 	} timelines;
 
-	struct list_head active_rings;
-
 	struct intel_wakeref wakeref;
 
 	struct list_head closed_vma;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b5618e6c1361..71c69381a1aa 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1619,6 +1619,7 @@ static void execlists_context_unpin(struct intel_context *ce)
 {
 	i915_gem_context_unpin_hw_id(ce->gem_context);
 	i915_gem_object_unpin_map(ce->state->obj);
+	intel_ring_reset(ce->ring, ce->ring->tail);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index be170b10d92f..8ce5a55427c1 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1248,7 +1248,7 @@ void intel_ring_unpin(struct intel_ring *ring)
 		return;
 
 	/* Discard any unused bytes beyond that submitted to hw. */
-	intel_ring_reset(ring, ring->tail);
+	intel_ring_reset(ring, ring->emit);
 
 	i915_vma_unset_ggtt_write(vma);
 	if (i915_vma_is_map_and_fenceable(vma))
@@ -1309,7 +1309,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ring->ref);
-	INIT_LIST_HEAD(&ring->request_list);
 
 	ring->size = size;
 	/* Workaround an erratum on the i830 which causes a hang if
@@ -1863,7 +1862,10 @@ static int ring_request_alloc(struct i915_request *request)
 	return 0;
 }
 
-static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
+static noinline int
+wait_for_space(struct intel_ring *ring,
+	       struct intel_timeline *tl,
+	       unsigned int bytes)
 {
 	struct i915_request *target;
 	long timeout;
@@ -1871,15 +1873,18 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 	if (intel_ring_update_space(ring) >= bytes)
 		return 0;
 
-	GEM_BUG_ON(list_empty(&ring->request_list));
-	list_for_each_entry(target, &ring->request_list, ring_link) {
+	GEM_BUG_ON(list_empty(&tl->requests));
+	list_for_each_entry(target, &tl->requests, link) {
+		if (target->ring != ring)
+			continue;
+
 		/* Would completion of this request free enough space? */
 		if (bytes <= __intel_ring_space(target->postfix,
 						ring->emit, ring->size))
 			break;
 	}
 
-	if (WARN_ON(&target->ring_link == &ring->request_list))
+	if (GEM_WARN_ON(&target->link == &tl->requests))
 		return -ENOSPC;
 
 	timeout = i915_request_wait(target,
@@ -1946,7 +1951,7 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 		 */
 		GEM_BUG_ON(!rq->reserved_space);
 
-		ret = wait_for_space(ring, total_bytes);
+		ret = wait_for_space(ring, rq->timeline, total_bytes);
 		if (unlikely(ret))
 			return ERR_PTR(ret);
 	}
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 54a11dde3076..5d43cbc3f345 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -58,7 +58,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	ring->vaddr = (void *)(ring + 1);
 	atomic_set(&ring->pin_count, 1);
 
-	INIT_LIST_HEAD(&ring->request_list);
 	intel_ring_update_space(ring);
 
 	return ring;
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index da9c49e2adaf..6fbc72bc290e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -20,10 +20,13 @@ static int request_sync(struct i915_request *rq)
 
 	i915_request_add(rq);
 	timeout = i915_request_wait(rq, 0, HZ / 10);
-	if (timeout < 0)
+	if (timeout < 0) {
 		err = timeout;
-	else
+	} else {
+		mutex_lock(&rq->timeline->mutex);
 		i915_request_retire_upto(rq);
+		mutex_unlock(&rq->timeline->mutex);
+	}
 
 	i915_request_put(rq);
 
@@ -35,6 +38,7 @@ static int context_sync(struct intel_context *ce)
 	struct intel_timeline *tl = ce->timeline;
 	int err = 0;
 
+	mutex_lock(&tl->mutex);
 	do {
 		struct i915_request *rq;
 		long timeout;
@@ -55,6 +59,7 @@ static int context_sync(struct intel_context *ce)
 
 		i915_request_put(rq);
 	} while (!err);
+	mutex_unlock(&tl->mutex);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 4703aab3ae21..74b611ab60a2 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -181,40 +181,6 @@ i915_request_remove_from_client(struct i915_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static void advance_ring(struct i915_request *request)
-{
-	struct intel_ring *ring = request->ring;
-	unsigned int tail;
-
-	/*
-	 * We know the GPU must have read the request to have
-	 * sent us the seqno + interrupt, so use the position
-	 * of tail of the request to update the last known position
-	 * of the GPU head.
-	 *
-	 * Note this requires that we are always called in request
-	 * completion order.
-	 */
-	GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
-	if (list_is_last(&request->ring_link, &ring->request_list)) {
-		/*
-		 * We may race here with execlists resubmitting this request
-		 * as we retire it. The resubmission will move the ring->tail
-		 * forwards (to request->wa_tail). We either read the
-		 * current value that was written to hw, or the value that
-		 * is just about to be. Either works, if we miss the last two
-		 * noops - they are safe to be replayed on a reset.
-		 */
-		tail = READ_ONCE(request->tail);
-		list_del(&ring->active_link);
-	} else {
-		tail = request->postfix;
-	}
-	list_del_init(&request->ring_link);
-
-	ring->head = tail;
-}
-
 static void free_capture_list(struct i915_request *request)
 {
 	struct i915_capture_list *capture;
@@ -232,7 +198,7 @@ static bool i915_request_retire(struct i915_request *rq)
 {
 	struct i915_active_request *active, *next;
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	lockdep_assert_held(&rq->timeline->mutex);
 	if (!i915_request_completed(rq))
 		return false;
 
@@ -244,7 +210,17 @@ static bool i915_request_retire(struct i915_request *rq)
 	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
 	trace_i915_request_retire(rq);
 
-	advance_ring(rq);
+	/*
+	 * We know the GPU must have read the request to have
+	 * sent us the seqno + interrupt, so use the position
+	 * of tail of the request to update the last known position
+	 * of the GPU head.
+	 *
+	 * Note this requires that we are always called in request
+	 * completion order.
+	 */
+	GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests));
+	rq->ring->head = rq->postfix;
 
 	/*
 	 * Walk through the active list, calling retire on each. This allows
@@ -321,7 +297,7 @@ static bool i915_request_retire(struct i915_request *rq)
 
 void i915_request_retire_upto(struct i915_request *rq)
 {
-	struct intel_ring *ring = rq->ring;
+	struct intel_timeline * const tl = rq->timeline;
 	struct i915_request *tmp;
 
 	GEM_TRACE("%s fence %llx:%lld, current %d\n",
@@ -329,15 +305,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 		  rq->fence.context, rq->fence.seqno,
 		  hwsp_seqno(rq));
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	lockdep_assert_held(&tl->mutex);
 	GEM_BUG_ON(!i915_request_completed(rq));
 
-	if (list_empty(&rq->ring_link))
-		return;
-
 	do {
-		tmp = list_first_entry(&ring->request_list,
-				       typeof(*tmp), ring_link);
+		tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
@@ -564,29 +536,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	return NOTIFY_DONE;
 }
 
-static void ring_retire_requests(struct intel_ring *ring)
+static void retire_requests(struct intel_timeline *tl)
 {
 	struct i915_request *rq, *rn;
 
-	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
+	list_for_each_entry_safe(rq, rn, &tl->requests, link)
 		if (!i915_request_retire(rq))
 			break;
 }
 
 static noinline struct i915_request *
-request_alloc_slow(struct intel_context *ce, gfp_t gfp)
+request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
 {
-	struct intel_ring *ring = ce->ring;
 	struct i915_request *rq;
 
-	if (list_empty(&ring->request_list))
+	if (list_empty(&tl->requests))
 		goto out;
 
 	if (!gfpflags_allow_blocking(gfp))
 		goto out;
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
+	rq = list_first_entry(&tl->requests, typeof(*rq), link);
 	i915_request_retire(rq);
 
 	rq = kmem_cache_alloc(global.slab_requests,
@@ -595,11 +566,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp)
 		return rq;
 
 	/* Ratelimit ourselves to prevent oom from malicious clients */
-	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
+	rq = list_last_entry(&tl->requests, typeof(*rq), link);
 	cond_synchronize_rcu(rq->rcustate);
 
 	/* Retire our old requests in the hope that we free some */
-	ring_retire_requests(ring);
+	retire_requests(tl);
 
 out:
 	return kmem_cache_alloc(global.slab_requests, gfp);
@@ -650,7 +621,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	rq = kmem_cache_alloc(global.slab_requests,
 			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (unlikely(!rq)) {
-		rq = request_alloc_slow(ce, gfp);
+		rq = request_alloc_slow(tl, gfp);
 		if (!rq) {
 			ret = -ENOMEM;
 			goto err_unreserve;
@@ -742,15 +713,15 @@ struct i915_request *
 i915_request_create(struct intel_context *ce)
 {
 	struct i915_request *rq;
-	int err;
+	struct intel_timeline *tl;
 
-	err = intel_context_timeline_lock(ce);
-	if (err)
-		return ERR_PTR(err);
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl))
+		return ERR_CAST(tl);
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ce->ring->request_list))
+	rq = list_first_entry(&tl->requests, typeof(*rq), link);
+	if (!list_is_last(&rq->link, &tl->requests))
 		i915_request_retire(rq);
 
 	intel_context_enter(ce);
@@ -760,22 +731,22 @@ i915_request_create(struct intel_context *ce)
 		goto err_unlock;
 
 	/* Check that we do not interrupt ourselves with a new request */
-	rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
+	rq->cookie = lockdep_pin_lock(&tl->mutex);
 
 	return rq;
 
 err_unlock:
-	intel_context_timeline_unlock(ce);
+	intel_context_timeline_unlock(tl);
 	return rq;
 }
 
 static int
 i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
 {
-	if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+	if (list_is_first(&signal->link, &signal->timeline->requests))
 		return 0;
 
-	signal = list_prev_entry(signal, ring_link);
+	signal = list_prev_entry(signal, link);
 	if (intel_timeline_sync_is_later(rq->timeline, &signal->fence))
 		return 0;
 
@@ -1155,7 +1126,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
 	struct intel_ring *ring = rq->ring;
-	struct i915_request *prev;
 	u32 *cs;
 
 	GEM_TRACE("%s fence %llx:%lld\n",
@@ -1168,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	 */
 	GEM_BUG_ON(rq->reserved_space > ring->space);
 	rq->reserved_space = 0;
+	rq->emitted_jiffies = jiffies;
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
@@ -1179,14 +1150,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	GEM_BUG_ON(IS_ERR(cs));
 	rq->postfix = intel_ring_offset(rq, cs);
 
-	prev = __i915_request_add_to_timeline(rq);
-
-	list_add_tail(&rq->ring_link, &ring->request_list);
-	if (list_is_first(&rq->ring_link, &ring->request_list))
-		list_add(&ring->active_link, &rq->i915->gt.active_rings);
-	rq->emitted_jiffies = jiffies;
-
-	return prev;
+	return __i915_request_add_to_timeline(rq);
 }
 
 void __i915_request_queue(struct i915_request *rq,
@@ -1214,10 +1178,11 @@ void __i915_request_queue(struct i915_request *rq,
 void i915_request_add(struct i915_request *rq)
 {
 	struct i915_sched_attr attr = rq->gem_context->sched;
+	struct intel_timeline * const tl = rq->timeline;
 	struct i915_request *prev;
 
-	lockdep_assert_held(&rq->timeline->mutex);
-	lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
+	lockdep_assert_held(&tl->mutex);
+	lockdep_unpin_lock(&tl->mutex, rq->cookie);
 
 	trace_i915_request_add(rq);
 
@@ -1266,10 +1231,10 @@ void i915_request_add(struct i915_request *rq)
 	 * work on behalf of others -- but instead we should benefit from
 	 * improved resource management. (Well, that's the theory at least.)
 	 */
-	if (prev && i915_request_completed(prev))
+	if (prev && i915_request_completed(prev) && prev->timeline == tl)
 		i915_request_retire_upto(prev);
 
-	mutex_unlock(&rq->timeline->mutex);
+	mutex_unlock(&tl->mutex);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
@@ -1489,18 +1454,43 @@ long i915_request_wait(struct i915_request *rq,
 
 bool i915_retire_requests(struct drm_i915_private *i915)
 {
-	struct intel_ring *ring, *tmp;
+	struct intel_gt_timelines *timelines = &i915->gt.timelines;
+	struct intel_timeline *tl, *tn;
+	LIST_HEAD(free);
+
+	spin_lock(&timelines->lock);
+	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+		if (!mutex_trylock(&tl->mutex))
+			continue;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
+		intel_timeline_get(tl);
+		GEM_BUG_ON(!tl->active_count);
+		tl->active_count++; /* pin the list element */
+		spin_unlock(&timelines->lock);
 
-	list_for_each_entry_safe(ring, tmp,
-				 &i915->gt.active_rings, active_link) {
-		intel_ring_get(ring); /* last rq holds reference! */
-		ring_retire_requests(ring);
-		intel_ring_put(ring);
+		retire_requests(tl);
+
+		spin_lock(&timelines->lock);
+
+		/* Restart iteration after dropping lock */
+		list_safe_reset_next(tl, tn, link);
+		if (!--tl->active_count)
+			list_del(&tl->link);
+
+		mutex_unlock(&tl->mutex);
+
+		/* Defer the final release to after the spinlock */
+		if (refcount_dec_and_test(&tl->kref.refcount)) {
+			GEM_BUG_ON(tl->active_count);
+			list_add(&tl->link, &free);
+		}
 	}
+	spin_unlock(&timelines->lock);
+
+	list_for_each_entry_safe(tl, tn, &free, link)
+		__intel_timeline_free(&tl->kref);
 
-	return !list_empty(&i915->gt.active_rings);
+	return !list_empty(&timelines->active_list);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index fec1d5f17c94..8ac6e1226a56 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -223,9 +223,6 @@ struct i915_request {
 	/** timeline->request entry for this request */
 	struct list_head link;
 
-	/** ring->request_list entry for this request */
-	struct list_head ring_link;
-
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_link;
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 09/18] drm/i915/gt: Mark context->active_count as protected by timeline->mutex
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (6 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 08/18] drm/i915: Protect request retirement with timeline->mutex Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 10/18] drm/i915: Forgo last_fence active request tracking Chris Wilson
                   ` (14 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

We use timeline->mutex to protect modifications to
context->active_count, and the associated enable/disable callbacks.
Due to complications with engine-pm barrier there is a path where we used
a "superlock" to provide serialised protect and so could not
unconditionally assert with lockdep that it was always held. However,
we can mark the mutex as taken (noting that we may be nested underneath
ourselves) which means we can be reassured the right timeline->mutex is
always treated as held and let lockdep roam free.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.h       |  3 +++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     | 14 ++++++++++++++
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  4 ++++
 drivers/gpu/drm/i915/i915_request.c           |  3 ++-
 5 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 053a1307ecb4..dd742ac2fbdb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -89,17 +89,20 @@ void intel_context_exit_engine(struct intel_context *ce);
 
 static inline void intel_context_enter(struct intel_context *ce)
 {
+	lockdep_assert_held(&ce->timeline->mutex);
 	if (!ce->active_count++)
 		ce->ops->enter(ce);
 }
 
 static inline void intel_context_mark_active(struct intel_context *ce)
 {
+	lockdep_assert_held(&ce->timeline->mutex);
 	++ce->active_count;
 }
 
 static inline void intel_context_exit(struct intel_context *ce)
 {
+	lockdep_assert_held(&ce->timeline->mutex);
 	GEM_BUG_ON(!ce->active_count);
 	if (!--ce->active_count)
 		ce->ops->exit(ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index d8ce266c049f..bf9cedfccbf0 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -59,7 +59,7 @@ struct intel_context {
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
 
-	unsigned int active_count; /* notionally protected by timeline->mutex */
+	unsigned int active_count; /* protected by timeline->mutex */
 
 	atomic_t pin_count;
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index d6a00a04ed6d..f4358f75193e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -37,6 +37,16 @@ static int __engine_unpark(struct intel_wakeref *wf)
 	return 0;
 }
 
+static inline void __timeline_mark_lock(struct intel_context *ce)
+{
+	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
+}
+
+static inline void __timeline_mark_unlock(struct intel_context *ce)
+{
+	mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
+}
+
 static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq;
@@ -61,6 +71,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	 * retiring the last request, thus all rings should be empty and
 	 * all timelines idle.
 	 */
+	__timeline_mark_lock(engine->kernel_context);
+
 	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
 	if (IS_ERR(rq))
 		/* Context switch failed, hope for the best! Maybe reset? */
@@ -78,6 +90,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	__intel_wakeref_defer_park(&engine->wakeref);
 	__i915_request_queue(rq, NULL);
 
+	__timeline_mark_unlock(engine->kernel_context);
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 7b476cd55dac..eafd94d5e211 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -338,6 +338,8 @@ void intel_timeline_enter(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
 
+	lockdep_assert_held(&tl->mutex);
+
 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
 	if (tl->active_count++)
 		return;
@@ -352,6 +354,8 @@ void intel_timeline_exit(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
 
+	lockdep_assert_held(&tl->mutex);
+
 	GEM_BUG_ON(!tl->active_count);
 	if (--tl->active_count)
 		return;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 74b611ab60a2..7170ccb3c677 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1087,7 +1087,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * precludes optimising to use semaphores serialisation of a single
 	 * timeline across engines.
 	 */
-	prev = rcu_dereference_protected(timeline->last_request.request, 1);
+	prev = rcu_dereference_protected(timeline->last_request.request,
+					 lockdep_is_held(&timeline->mutex));
 	if (prev && !i915_request_completed(prev)) {
 		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
 			i915_sw_fence_await_sw_fence(&rq->submit,
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 10/18] drm/i915: Forgo last_fence active request tracking
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (7 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 09/18] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 11/18] drm/i915/overlay: Switch to using i915_active tracking Chris Wilson
                   ` (13 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

We were using the last_fence to track the last request that used this
vma that might be interpreted by a fence register and forced ourselves
to wait for this request before modifying any fence register that
overlapped our vma. Due to requirement that we need to track any XY_BLT
command, linear or tiled, this in effect meant that we have to track the
vma for its active lifespan anyway, so we can forgo the explicit
last_fence tracking and just use the whole vma->active.

Another solution would be to pipeline the register updates, and would
help resolve some long running stalls for gen3 (but only gen 2 and 3!)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c       |  4 +---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c |  6 ++----
 drivers/gpu/drm/i915/i915_gem_gtt.c       |  1 -
 drivers/gpu/drm/i915/i915_vma.c           | 13 -------------
 drivers/gpu/drm/i915/i915_vma.h           |  1 -
 5 files changed, 3 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b616ba0e0da0..2c640987c24d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -212,9 +212,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 			}
 		}
 		if (vma->fence)
-			seq_printf(m, " , fence: %d%s",
-				   vma->fence->id,
-				   i915_active_request_isset(&vma->last_fence) ? "*" : "");
+			seq_printf(m, " , fence: %d", vma->fence->id);
 		seq_puts(m, ")");
 
 		spin_lock(&obj->vma.lock);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index bcac359ec661..c9654f1a468f 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -230,16 +230,14 @@ static int fence_update(struct i915_fence_reg *fence,
 			 i915_gem_object_get_tiling(vma->obj)))
 			return -EINVAL;
 
-		ret = i915_active_request_retire(&vma->last_fence,
-					     &vma->obj->base.dev->struct_mutex);
+		ret = i915_active_wait(&vma->active);
 		if (ret)
 			return ret;
 	}
 
 	old = xchg(&fence->vma, NULL);
 	if (old) {
-		ret = i915_active_request_retire(&old->last_fence,
-					     &old->obj->base.dev->struct_mutex);
+		ret = i915_active_wait(&old->active);
 		if (ret) {
 			fence->vma = old;
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 72a227c43e35..e07c1ae971d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1867,7 +1867,6 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 		return ERR_PTR(-ENOMEM);
 
 	i915_active_init(i915, &vma->active, NULL, NULL);
-	INIT_ACTIVE_REQUEST(&vma->last_fence);
 
 	vma->vm = &ggtt->vm;
 	vma->ops = &pd_vma_ops;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4183b0e10324..8be1bbef40e5 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -120,7 +120,6 @@ vma_create(struct drm_i915_gem_object *obj,
 
 	i915_active_init(vm->i915, &vma->active,
 			 __i915_vma_active, __i915_vma_retire);
-	INIT_ACTIVE_REQUEST(&vma->last_fence);
 
 	/* Declare ourselves safe for use inside shrinkers */
 	if (IS_ENABLED(CONFIG_LOCKDEP)) {
@@ -802,8 +801,6 @@ static void __i915_vma_destroy(struct i915_vma *vma)
 	GEM_BUG_ON(vma->node.allocated);
 	GEM_BUG_ON(vma->fence);
 
-	GEM_BUG_ON(i915_active_request_isset(&vma->last_fence));
-
 	mutex_lock(&vma->vm->mutex);
 	list_del(&vma->vm_link);
 	mutex_unlock(&vma->vm->mutex);
@@ -928,9 +925,6 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	obj->read_domains |= I915_GEM_GPU_DOMAINS;
 	obj->mm.dirty = true;
 
-	if (flags & EXEC_OBJECT_NEEDS_FENCE)
-		__i915_active_request_set(&vma->last_fence, rq);
-
 	GEM_BUG_ON(!i915_vma_is_active(vma));
 	return 0;
 }
@@ -961,14 +955,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 * before we are finished).
 		 */
 		__i915_vma_pin(vma);
-
 		ret = i915_active_wait(&vma->active);
-		if (ret)
-			goto unpin;
-
-		ret = i915_active_request_retire(&vma->last_fence,
-					      &vma->vm->i915->drm.struct_mutex);
-unpin:
 		__i915_vma_unpin(vma);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5c4224749bde..b3d2121be947 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -111,7 +111,6 @@ struct i915_vma {
 #define I915_VMA_GGTT_WRITE	BIT(14)
 
 	struct i915_active active;
-	struct i915_active_request last_fence;
 
 	/**
 	 * Support different GGTT views into the same object.
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 11/18] drm/i915/overlay: Switch to using i915_active tracking
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (8 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 10/18] drm/i915: Forgo last_fence active request tracking Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 15:38   ` Matthew Auld
  2019-08-12 13:39 ` [PATCH 12/18] drm/i915: Extract intel_frontbuffer active tracking Chris Wilson
                   ` (12 subsequent siblings)
  22 siblings, 1 reply; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

Remove the raw i915_active_request tracking in favour of the higher
level i915_active tracking for the sole purpose of making the lockless
transition easier in later patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/display/intel_overlay.c | 129 +++++++++----------
 drivers/gpu/drm/i915/i915_active.h           |  19 ---
 2 files changed, 64 insertions(+), 84 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 4d3b2086570e..4f78586ee05e 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -191,7 +191,8 @@ struct intel_overlay {
 	struct overlay_registers __iomem *regs;
 	u32 flip_addr;
 	/* flip handling */
-	struct i915_active_request last_flip;
+	struct i915_active last_flip;
+	void (*flip_complete)(struct intel_overlay *ovl);
 };
 
 static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv,
@@ -217,30 +218,25 @@ static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv,
 				  PCI_DEVFN(0, 0), I830_CLOCK_GATE, val);
 }
 
-static void intel_overlay_submit_request(struct intel_overlay *overlay,
-					 struct i915_request *rq,
-					 i915_active_retire_fn retire)
+static struct i915_request *
+alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *))
 {
-	GEM_BUG_ON(i915_active_request_peek(&overlay->last_flip,
-					    &overlay->i915->drm.struct_mutex));
-	i915_active_request_set_retire_fn(&overlay->last_flip, retire,
-					  &overlay->i915->drm.struct_mutex);
-	__i915_active_request_set(&overlay->last_flip, rq);
-	i915_request_add(rq);
-}
+	struct i915_request *rq;
+	int err;
 
-static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
-					 struct i915_request *rq,
-					 i915_active_retire_fn retire)
-{
-	intel_overlay_submit_request(overlay, rq, retire);
-	return i915_active_request_retire(&overlay->last_flip,
-					  &overlay->i915->drm.struct_mutex);
-}
+	overlay->flip_complete = fn;
 
-static struct i915_request *alloc_request(struct intel_overlay *overlay)
-{
-	return i915_request_create(overlay->context);
+	rq = i915_request_create(overlay->context);
+	if (IS_ERR(rq))
+		return rq;
+
+	err = i915_active_ref(&overlay->last_flip, rq->fence.context, rq);
+	if (err) {
+		i915_request_add(rq);
+		return ERR_PTR(err);
+	}
+
+	return rq;
 }
 
 /* overlay needs to be disable in OCMD reg */
@@ -252,7 +248,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 
 	WARN_ON(overlay->active);
 
-	rq = alloc_request(overlay);
+	rq = alloc_request(overlay, NULL);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -273,7 +269,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 	*cs++ = MI_NOOP;
 	intel_ring_advance(rq, cs);
 
-	return intel_overlay_do_wait_request(overlay, rq, NULL);
+	i915_request_add(rq);
+
+	return i915_active_wait(&overlay->last_flip);
 }
 
 static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
@@ -317,7 +315,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	if (tmp & (1 << 17))
 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
 
-	rq = alloc_request(overlay);
+	rq = alloc_request(overlay, NULL);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -332,8 +330,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	intel_ring_advance(rq, cs);
 
 	intel_overlay_flip_prepare(overlay, vma);
-
-	intel_overlay_submit_request(overlay, rq, NULL);
+	i915_request_add(rq);
 
 	return 0;
 }
@@ -354,20 +351,13 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay)
 }
 
 static void
-intel_overlay_release_old_vid_tail(struct i915_active_request *active,
-				   struct i915_request *rq)
+intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
-	struct intel_overlay *overlay =
-		container_of(active, typeof(*overlay), last_flip);
-
 	intel_overlay_release_old_vma(overlay);
 }
 
-static void intel_overlay_off_tail(struct i915_active_request *active,
-				   struct i915_request *rq)
+static void intel_overlay_off_tail(struct intel_overlay *overlay)
 {
-	struct intel_overlay *overlay =
-		container_of(active, typeof(*overlay), last_flip);
 	struct drm_i915_private *dev_priv = overlay->i915;
 
 	intel_overlay_release_old_vma(overlay);
@@ -380,6 +370,16 @@ static void intel_overlay_off_tail(struct i915_active_request *active,
 		i830_overlay_clock_gating(dev_priv, true);
 }
 
+static void
+intel_overlay_last_flip_retire(struct i915_active *active)
+{
+	struct intel_overlay *overlay =
+		container_of(active, typeof(*overlay), last_flip);
+
+	if (overlay->flip_complete)
+		overlay->flip_complete(overlay);
+}
+
 /* overlay needs to be disabled in OCMD reg */
 static int intel_overlay_off(struct intel_overlay *overlay)
 {
@@ -394,7 +394,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	 * of the hw. Do it in both cases */
 	flip_addr |= OFC_UPDATE;
 
-	rq = alloc_request(overlay);
+	rq = alloc_request(overlay, intel_overlay_off_tail);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -417,17 +417,16 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	intel_ring_advance(rq, cs);
 
 	intel_overlay_flip_prepare(overlay, NULL);
+	i915_request_add(rq);
 
-	return intel_overlay_do_wait_request(overlay, rq,
-					     intel_overlay_off_tail);
+	return i915_active_wait(&overlay->last_flip);
 }
 
 /* recover from an interruption due to a signal
  * We have to be careful not to repeat work forever an make forward progess. */
 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 {
-	return i915_active_request_retire(&overlay->last_flip,
-					  &overlay->i915->drm.struct_mutex);
+	return i915_active_wait(&overlay->last_flip);
 }
 
 /* Wait for pending overlay flip and release old frame.
@@ -437,43 +436,40 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 {
 	struct drm_i915_private *dev_priv = overlay->i915;
+	struct i915_request *rq;
 	u32 *cs;
-	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	/* Only wait if there is actually an old frame to release to
+	/*
+	 * Only wait if there is actually an old frame to release to
 	 * guarantee forward progress.
 	 */
 	if (!overlay->old_vma)
 		return 0;
 
-	if (I915_READ(GEN2_ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
-		/* synchronous slowpath */
-		struct i915_request *rq;
+	if (!(I915_READ(GEN2_ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT)) {
+		intel_overlay_release_old_vid_tail(overlay);
+		return 0;
+	}
 
-		rq = alloc_request(overlay);
-		if (IS_ERR(rq))
-			return PTR_ERR(rq);
+	rq = alloc_request(overlay, intel_overlay_release_old_vid_tail);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
 
-		cs = intel_ring_begin(rq, 2);
-		if (IS_ERR(cs)) {
-			i915_request_add(rq);
-			return PTR_ERR(cs);
-		}
+	cs = intel_ring_begin(rq, 2);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		return PTR_ERR(cs);
+	}
 
-		*cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
-		*cs++ = MI_NOOP;
-		intel_ring_advance(rq, cs);
+	*cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP;
+	*cs++ = MI_NOOP;
+	intel_ring_advance(rq, cs);
 
-		ret = intel_overlay_do_wait_request(overlay, rq,
-						    intel_overlay_release_old_vid_tail);
-		if (ret)
-			return ret;
-	} else
-		intel_overlay_release_old_vid_tail(&overlay->last_flip, NULL);
+	i915_request_add(rq);
 
-	return 0;
+	return i915_active_wait(&overlay->last_flip);
 }
 
 void intel_overlay_reset(struct drm_i915_private *dev_priv)
@@ -1375,7 +1371,9 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 	overlay->contrast = 75;
 	overlay->saturation = 146;
 
-	INIT_ACTIVE_REQUEST(&overlay->last_flip);
+	i915_active_init(dev_priv,
+			 &overlay->last_flip,
+			 NULL, intel_overlay_last_flip_retire);
 
 	ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
 	if (ret)
@@ -1409,6 +1407,7 @@ void intel_overlay_cleanup(struct drm_i915_private *dev_priv)
 	WARN_ON(overlay->active);
 
 	i915_gem_object_put(overlay->reg_bo);
+	i915_active_fini(&overlay->last_flip);
 
 	kfree(overlay);
 }
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 566336c99ed7..f6d730cf2fe6 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -89,25 +89,6 @@ int __must_check
 i915_active_request_set(struct i915_active_request *active,
 			struct i915_request *rq);
 
-/**
- * i915_active_request_set_retire_fn - updates the retirement callback
- * @active - the active tracker
- * @fn - the routine called when the request is retired
- * @mutex - struct_mutex used to guard retirements
- *
- * i915_active_request_set_retire_fn() updates the function pointer that
- * is called when the final request associated with the @active tracker
- * is retired.
- */
-static inline void
-i915_active_request_set_retire_fn(struct i915_active_request *active,
-				  i915_active_retire_fn fn,
-				  struct mutex *mutex)
-{
-	lockdep_assert_held(mutex);
-	active->retire = fn ?: i915_active_retire_noop;
-}
-
 /**
  * i915_active_request_raw - return the active request
  * @active - the active tracker
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 12/18] drm/i915: Extract intel_frontbuffer active tracking
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (9 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 11/18] drm/i915/overlay: Switch to using i915_active tracking Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 13/18] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
                   ` (11 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

Move the active tracking for the frontbuffer operations out of the
i915_gem_object and into its own first class (refcounted) object. In the
process of detangling, we switch from low level request tracking to the
easier i915_active -- with the plan that this avoids any potential
atomic callbacks as the frontbuffer tracking wishes to sleep as it
flushes.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 Documentation/gpu/i915.rst                    |   3 -
 drivers/gpu/drm/i915/display/intel_display.c  |  70 +++--
 .../drm/i915/display/intel_display_types.h    |   1 +
 drivers/gpu/drm/i915/display/intel_fbdev.c    |  40 ++-
 .../gpu/drm/i915/display/intel_frontbuffer.c  | 255 +++++++++++++-----
 .../gpu/drm/i915/display/intel_frontbuffer.h  |  70 +++--
 drivers/gpu/drm/i915/display/intel_overlay.c  |   8 +-
 drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   2 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  14 +-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      |   4 -
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  27 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   2 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   8 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |   5 -
 drivers/gpu/drm/i915/i915_drv.h               |   4 -
 drivers/gpu/drm/i915/i915_gem.c               |  47 +---
 drivers/gpu/drm/i915/i915_vma.c               |   6 +-
 17 files changed, 306 insertions(+), 260 deletions(-)

diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 0e322688be5c..3415255ad3dc 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -91,9 +91,6 @@ Frontbuffer Tracking
 .. kernel-doc:: drivers/gpu/drm/i915/display/intel_frontbuffer.c
    :internal:
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem.c
-   :functions: i915_gem_track_fb
-
 Display FIFO Underrun Reporting
 -------------------------------
 
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 647f49ca86ff..84e8827c9107 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3049,12 +3049,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_gem_object *obj = NULL;
 	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
 	struct drm_framebuffer *fb = &plane_config->fb->base;
 	u32 base_aligned = round_down(plane_config->base, PAGE_SIZE);
 	u32 size_aligned = round_up(plane_config->base + plane_config->size,
 				    PAGE_SIZE);
+	struct drm_i915_gem_object *obj;
+	bool ret = false;
 
 	size_aligned -= base_aligned;
 
@@ -3096,7 +3097,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 		break;
 	default:
 		MISSING_CASE(plane_config->tiling);
-		return false;
+		goto out;
 	}
 
 	mode_cmd.pixel_format = fb->format->format;
@@ -3108,16 +3109,15 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 
 	if (intel_framebuffer_init(to_intel_framebuffer(fb), obj, &mode_cmd)) {
 		DRM_DEBUG_KMS("intel fb init failed\n");
-		goto out_unref_obj;
+		goto out;
 	}
 
 
 	DRM_DEBUG_KMS("initial plane fb obj %p\n", obj);
-	return true;
-
-out_unref_obj:
+	ret = true;
+out:
 	i915_gem_object_put(obj);
-	return false;
+	return ret;
 }
 
 static void
@@ -3174,6 +3174,12 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
 	intel_disable_plane(plane, crtc_state);
 }
 
+static struct intel_frontbuffer *
+to_intel_frontbuffer(struct drm_framebuffer *fb)
+{
+	return fb ? to_intel_framebuffer(fb)->frontbuffer : NULL;
+}
+
 static void
 intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 			     struct intel_initial_plane_config *plane_config)
@@ -3181,7 +3187,6 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *c;
-	struct drm_i915_gem_object *obj;
 	struct drm_plane *primary = intel_crtc->base.primary;
 	struct drm_plane_state *plane_state = primary->state;
 	struct intel_plane *intel_plane = to_intel_plane(primary);
@@ -3257,8 +3262,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 		return;
 	}
 
-	obj = intel_fb_obj(fb);
-	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
+	intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB);
 
 	plane_state->src_x = 0;
 	plane_state->src_y = 0;
@@ -3273,14 +3277,14 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	intel_state->base.src = drm_plane_state_src(plane_state);
 	intel_state->base.dst = drm_plane_state_dest(plane_state);
 
-	if (i915_gem_object_is_tiled(obj))
+	if (plane_config->tiling)
 		dev_priv->preserve_bios_swizzle = true;
 
 	plane_state->fb = fb;
 	plane_state->crtc = &intel_crtc->base;
 
 	atomic_or(to_intel_plane(primary)->frontbuffer_bit,
-		  &obj->frontbuffer_bits);
+		  &to_intel_frontbuffer(fb)->bits);
 }
 
 static int skl_max_plane_width(const struct drm_framebuffer *fb,
@@ -14132,9 +14136,9 @@ static void intel_atomic_track_fbs(struct intel_atomic_state *state)
 
 	for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state,
 					     new_plane_state, i)
-		i915_gem_track_fb(intel_fb_obj(old_plane_state->base.fb),
-				  intel_fb_obj(new_plane_state->base.fb),
-				  plane->frontbuffer_bit);
+		intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->base.fb),
+					to_intel_frontbuffer(new_plane_state->base.fb),
+					plane->frontbuffer_bit);
 }
 
 static int intel_atomic_commit(struct drm_device *dev,
@@ -14418,7 +14422,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		return ret;
 
 	fb_obj_bump_render_priority(obj);
-	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_DIRTYFB);
 
 	if (!new_state->fence) { /* implicit fencing */
 		struct dma_fence *fence;
@@ -14681,13 +14685,12 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 			   struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
-	int ret;
 	struct drm_plane_state *old_plane_state, *new_plane_state;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *old_fb;
 	struct intel_crtc_state *crtc_state =
 		to_intel_crtc_state(crtc->state);
 	struct intel_crtc_state *new_crtc_state;
+	int ret;
 
 	/*
 	 * When crtc is inactive or there is a modeset pending,
@@ -14755,11 +14758,10 @@ intel_legacy_cursor_update(struct drm_plane *plane,
 	if (ret)
 		goto out_unlock;
 
-	intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_FLIP);
-
-	old_fb = old_plane_state->fb;
-	i915_gem_track_fb(intel_fb_obj(old_fb), intel_fb_obj(fb),
-			  intel_plane->frontbuffer_bit);
+	intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP);
+	intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb),
+				to_intel_frontbuffer(fb),
+				intel_plane->frontbuffer_bit);
 
 	/* Swap plane state */
 	plane->state = new_plane_state;
@@ -15540,15 +15542,9 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
 static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 
 	drm_framebuffer_cleanup(fb);
-
-	i915_gem_object_lock(obj);
-	WARN_ON(!obj->framebuffer_references--);
-	i915_gem_object_unlock(obj);
-
-	i915_gem_object_put(obj);
+	intel_frontbuffer_put(intel_fb->frontbuffer);
 
 	kfree(intel_fb);
 }
@@ -15576,7 +15572,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 
 	i915_gem_object_flush_if_display(obj);
-	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
+	intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB);
 
 	return 0;
 }
@@ -15598,8 +15594,11 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	int ret = -EINVAL;
 	int i;
 
+	intel_fb->frontbuffer = intel_frontbuffer_get(obj);
+	if (!intel_fb->frontbuffer)
+		return -ENOMEM;
+
 	i915_gem_object_lock(obj);
-	obj->framebuffer_references++;
 	tiling = i915_gem_object_get_tiling(obj);
 	stride = i915_gem_object_get_stride(obj);
 	i915_gem_object_unlock(obj);
@@ -15716,9 +15715,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	return 0;
 
 err:
-	i915_gem_object_lock(obj);
-	obj->framebuffer_references--;
-	i915_gem_object_unlock(obj);
+	intel_frontbuffer_put(intel_fb->frontbuffer);
 	return ret;
 }
 
@@ -15736,8 +15733,7 @@ intel_user_framebuffer_create(struct drm_device *dev,
 		return ERR_PTR(-ENOENT);
 
 	fb = intel_framebuffer_create(obj, &mode_cmd);
-	if (IS_ERR(fb))
-		i915_gem_object_put(obj);
+	i915_gem_object_put(obj);
 
 	return fb;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index a88ec9aa9ca0..3c1a5f3e1d22 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -84,6 +84,7 @@ enum intel_broadcast_rgb {
 
 struct intel_framebuffer {
 	struct drm_framebuffer base;
+	struct intel_frontbuffer *frontbuffer;
 	struct intel_rotation_info rot_info;
 
 	/* for each plane in the normal GTT view */
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index eccfc30f8e32..5e7cc5a6ac7f 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -47,13 +47,14 @@
 #include "intel_fbdev.h"
 #include "intel_frontbuffer.h"
 
-static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev)
+static struct intel_frontbuffer *to_frontbuffer(struct intel_fbdev *ifbdev)
 {
-	struct drm_i915_gem_object *obj = intel_fb_obj(&ifbdev->fb->base);
-	unsigned int origin =
-		ifbdev->vma_flags & PLANE_HAS_FENCE ? ORIGIN_GTT : ORIGIN_CPU;
+	return ifbdev->fb->frontbuffer;
+}
 
-	intel_fb_obj_invalidate(obj, origin);
+static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev)
+{
+	intel_frontbuffer_invalidate(to_frontbuffer(ifbdev), ORIGIN_CPU);
 }
 
 static int intel_fbdev_set_par(struct fb_info *info)
@@ -120,7 +121,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_mode_fb_cmd2 mode_cmd = {};
 	struct drm_i915_gem_object *obj;
-	int size, ret;
+	int size;
 
 	/* we don't do packed 24bpp */
 	if (sizes->surface_bpp == 24)
@@ -147,24 +148,16 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 		obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("failed to allocate framebuffer\n");
-		ret = PTR_ERR(obj);
-		goto err;
+		return PTR_ERR(obj);
 	}
 
 	fb = intel_framebuffer_create(obj, &mode_cmd);
-	if (IS_ERR(fb)) {
-		ret = PTR_ERR(fb);
-		goto err_obj;
-	}
+	i915_gem_object_put(obj);
+	if (IS_ERR(fb))
+		return PTR_ERR(fb);
 
 	ifbdev->fb = to_intel_framebuffer(fb);
-
 	return 0;
-
-err_obj:
-	i915_gem_object_put(obj);
-err:
-	return ret;
 }
 
 static int intelfb_create(struct drm_fb_helper *helper,
@@ -180,7 +173,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	const struct i915_ggtt_view view = {
 		.type = I915_GGTT_VIEW_NORMAL,
 	};
-	struct drm_framebuffer *fb;
 	intel_wakeref_t wakeref;
 	struct fb_info *info;
 	struct i915_vma *vma;
@@ -226,8 +218,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		goto out_unlock;
 	}
 
-	fb = &ifbdev->fb->base;
-	intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_DIRTYFB);
+	intel_frontbuffer_flush(to_frontbuffer(ifbdev), ORIGIN_DIRTYFB);
 
 	info = drm_fb_helper_alloc_fbi(helper);
 	if (IS_ERR(info)) {
@@ -236,7 +227,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		goto out_unpin;
 	}
 
-	ifbdev->helper.fb = fb;
+	ifbdev->helper.fb = &ifbdev->fb->base;
 
 	info->fbops = &intelfb_ops;
 
@@ -262,13 +253,14 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	 * If the object is stolen however, it will be full of whatever
 	 * garbage was left in there.
 	 */
-	if (intel_fb_obj(fb)->stolen && !prealloc)
+	if (vma->obj->stolen && !prealloc)
 		memset_io(info->screen_base, 0, info->screen_size);
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
 
 	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n",
-		      fb->width, fb->height, i915_ggtt_offset(vma));
+		      ifbdev->fb->base.width, ifbdev->fb->base.height,
+		      i915_ggtt_offset(vma));
 	ifbdev->vma = vma;
 	ifbdev->vma_flags = flags;
 
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index 9cda88e41d29..719379774fa5 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -30,11 +30,11 @@
  * Many features require us to track changes to the currently active
  * frontbuffer, especially rendering targeted at the frontbuffer.
  *
- * To be able to do so GEM tracks frontbuffers using a bitmask for all possible
- * frontbuffer slots through i915_gem_track_fb(). The function in this file are
- * then called when the contents of the frontbuffer are invalidated, when
- * frontbuffer rendering has stopped again to flush out all the changes and when
- * the frontbuffer is exchanged with a flip. Subsystems interested in
+ * To be able to do so we track frontbuffers using a bitmask for all possible
+ * frontbuffer slots through intel_frontbuffer_track(). The functions in this
+ * file are then called when the contents of the frontbuffer are invalidated,
+ * when frontbuffer rendering has stopped again to flush out all the changes
+ * and when the frontbuffer is exchanged with a flip. Subsystems interested in
  * frontbuffer changes (e.g. PSR, FBC, DRRS) should directly put their callbacks
  * into the relevant places and filter for the frontbuffer slots that they are
  * interested int.
@@ -63,28 +63,9 @@
 #include "intel_frontbuffer.h"
 #include "intel_psr.h"
 
-void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			       enum fb_op_origin origin,
-			       unsigned int frontbuffer_bits)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-	if (origin == ORIGIN_CS) {
-		spin_lock(&dev_priv->fb_tracking.lock);
-		dev_priv->fb_tracking.busy_bits |= frontbuffer_bits;
-		dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
-		spin_unlock(&dev_priv->fb_tracking.lock);
-	}
-
-	might_sleep();
-	intel_psr_invalidate(dev_priv, frontbuffer_bits, origin);
-	intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits);
-	intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
-}
-
 /**
- * intel_frontbuffer_flush - flush frontbuffer
- * @dev_priv: i915 device
+ * frontbuffer_flush - flush frontbuffer
+ * @i915: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  * @origin: which operation caused the flush
  *
@@ -94,45 +75,27 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
  *
  * Can be called without any locks held.
  */
-static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv,
-				    unsigned frontbuffer_bits,
-				    enum fb_op_origin origin)
+static void frontbuffer_flush(struct drm_i915_private *i915,
+			      unsigned int frontbuffer_bits,
+			      enum fb_op_origin origin)
 {
 	/* Delay flushing when rings are still busy.*/
-	spin_lock(&dev_priv->fb_tracking.lock);
-	frontbuffer_bits &= ~dev_priv->fb_tracking.busy_bits;
-	spin_unlock(&dev_priv->fb_tracking.lock);
+	spin_lock(&i915->fb_tracking.lock);
+	frontbuffer_bits &= ~i915->fb_tracking.busy_bits;
+	spin_unlock(&i915->fb_tracking.lock);
 
 	if (!frontbuffer_bits)
 		return;
 
 	might_sleep();
-	intel_edp_drrs_flush(dev_priv, frontbuffer_bits);
-	intel_psr_flush(dev_priv, frontbuffer_bits, origin);
-	intel_fbc_flush(dev_priv, frontbuffer_bits, origin);
-}
-
-void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-			  enum fb_op_origin origin,
-			  unsigned int frontbuffer_bits)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-	if (origin == ORIGIN_CS) {
-		spin_lock(&dev_priv->fb_tracking.lock);
-		/* Filter out new bits since rendering started. */
-		frontbuffer_bits &= dev_priv->fb_tracking.busy_bits;
-		dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-		spin_unlock(&dev_priv->fb_tracking.lock);
-	}
-
-	if (frontbuffer_bits)
-		intel_frontbuffer_flush(dev_priv, frontbuffer_bits, origin);
+	intel_edp_drrs_flush(i915, frontbuffer_bits);
+	intel_psr_flush(i915, frontbuffer_bits, origin);
+	intel_fbc_flush(i915, frontbuffer_bits, origin);
 }
 
 /**
  * intel_frontbuffer_flip_prepare - prepare asynchronous frontbuffer flip
- * @dev_priv: i915 device
+ * @i915: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called after scheduling a flip on @obj. The actual
@@ -142,19 +105,19 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
+void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915,
 				    unsigned frontbuffer_bits)
 {
-	spin_lock(&dev_priv->fb_tracking.lock);
-	dev_priv->fb_tracking.flip_bits |= frontbuffer_bits;
+	spin_lock(&i915->fb_tracking.lock);
+	i915->fb_tracking.flip_bits |= frontbuffer_bits;
 	/* Remove stale busy bits due to the old buffer. */
-	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-	spin_unlock(&dev_priv->fb_tracking.lock);
+	i915->fb_tracking.busy_bits &= ~frontbuffer_bits;
+	spin_unlock(&i915->fb_tracking.lock);
 }
 
 /**
  * intel_frontbuffer_flip_complete - complete asynchronous frontbuffer flip
- * @dev_priv: i915 device
+ * @i915: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called after the flip has been latched and will complete
@@ -162,23 +125,22 @@ void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
+void intel_frontbuffer_flip_complete(struct drm_i915_private *i915,
 				     unsigned frontbuffer_bits)
 {
-	spin_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&i915->fb_tracking.lock);
 	/* Mask any cancelled flips. */
-	frontbuffer_bits &= dev_priv->fb_tracking.flip_bits;
-	dev_priv->fb_tracking.flip_bits &= ~frontbuffer_bits;
-	spin_unlock(&dev_priv->fb_tracking.lock);
+	frontbuffer_bits &= i915->fb_tracking.flip_bits;
+	i915->fb_tracking.flip_bits &= ~frontbuffer_bits;
+	spin_unlock(&i915->fb_tracking.lock);
 
 	if (frontbuffer_bits)
-		intel_frontbuffer_flush(dev_priv,
-					frontbuffer_bits, ORIGIN_FLIP);
+		frontbuffer_flush(i915, frontbuffer_bits, ORIGIN_FLIP);
 }
 
 /**
  * intel_frontbuffer_flip - synchronous frontbuffer flip
- * @dev_priv: i915 device
+ * @i915: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
  *
  * This function gets called after scheduling a flip on @obj. This is for
@@ -187,13 +149,160 @@ void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
  *
  * Can be called without any locks held.
  */
-void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
+void intel_frontbuffer_flip(struct drm_i915_private *i915,
 			    unsigned frontbuffer_bits)
 {
-	spin_lock(&dev_priv->fb_tracking.lock);
+	spin_lock(&i915->fb_tracking.lock);
 	/* Remove stale busy bits due to the old buffer. */
-	dev_priv->fb_tracking.busy_bits &= ~frontbuffer_bits;
-	spin_unlock(&dev_priv->fb_tracking.lock);
+	i915->fb_tracking.busy_bits &= ~frontbuffer_bits;
+	spin_unlock(&i915->fb_tracking.lock);
 
-	intel_frontbuffer_flush(dev_priv, frontbuffer_bits, ORIGIN_FLIP);
+	frontbuffer_flush(i915, frontbuffer_bits, ORIGIN_FLIP);
+}
+
+void __intel_fb_invalidate(struct intel_frontbuffer *front,
+			   enum fb_op_origin origin,
+			   unsigned int frontbuffer_bits)
+{
+	struct drm_i915_private *i915 = to_i915(front->obj->base.dev);
+
+	if (origin == ORIGIN_CS) {
+		spin_lock(&i915->fb_tracking.lock);
+		i915->fb_tracking.busy_bits |= frontbuffer_bits;
+		i915->fb_tracking.flip_bits &= ~frontbuffer_bits;
+		spin_unlock(&i915->fb_tracking.lock);
+	}
+
+	might_sleep();
+	intel_psr_invalidate(i915, frontbuffer_bits, origin);
+	intel_edp_drrs_invalidate(i915, frontbuffer_bits);
+	intel_fbc_invalidate(i915, frontbuffer_bits, origin);
+}
+
+void __intel_fb_flush(struct intel_frontbuffer *front,
+		      enum fb_op_origin origin,
+		      unsigned int frontbuffer_bits)
+{
+	struct drm_i915_private *i915 = to_i915(front->obj->base.dev);
+
+	if (origin == ORIGIN_CS) {
+		spin_lock(&i915->fb_tracking.lock);
+		/* Filter out new bits since rendering started. */
+		frontbuffer_bits &= i915->fb_tracking.busy_bits;
+		i915->fb_tracking.busy_bits &= ~frontbuffer_bits;
+		spin_unlock(&i915->fb_tracking.lock);
+	}
+
+	if (frontbuffer_bits)
+		frontbuffer_flush(i915, frontbuffer_bits, origin);
+}
+
+static int frontbuffer_active(struct i915_active *ref)
+{
+	struct intel_frontbuffer *front =
+		container_of(ref, typeof(*front), write);
+
+	kref_get(&front->ref);
+	return 0;
+}
+
+static void frontbuffer_retire(struct i915_active *ref)
+{
+	struct intel_frontbuffer *front =
+		container_of(ref, typeof(*front), write);
+
+	intel_frontbuffer_flush(front, ORIGIN_CS);
+	intel_frontbuffer_put(front);
+}
+
+static void frontbuffer_release(struct kref *ref)
+	__releases(&to_i915(front->obj->base.dev)->fb_tracking.lock)
+{
+	struct intel_frontbuffer *front =
+		container_of(ref, typeof(*front), ref);
+
+	front->obj->frontbuffer = NULL;
+	spin_unlock(&to_i915(front->obj->base.dev)->fb_tracking.lock);
+
+	i915_gem_object_put(front->obj);
+	kfree(front);
+}
+
+struct intel_frontbuffer *
+intel_frontbuffer_get(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_frontbuffer *front;
+
+	spin_lock(&i915->fb_tracking.lock);
+	front = obj->frontbuffer;
+	if (front)
+		kref_get(&front->ref);
+	spin_unlock(&i915->fb_tracking.lock);
+	if (front)
+		return front;
+
+	front = kmalloc(sizeof(*front), GFP_KERNEL);
+	if (!front)
+		return NULL;
+
+	front->obj = obj;
+	kref_init(&front->ref);
+	atomic_set(&front->bits, 0);
+	i915_active_init(i915, &front->write,
+			 frontbuffer_active, frontbuffer_retire);
+
+	spin_lock(&i915->fb_tracking.lock);
+	if (obj->frontbuffer) {
+		kfree(front);
+		front = obj->frontbuffer;
+		kref_get(&front->ref);
+	} else {
+		i915_gem_object_get(obj);
+		obj->frontbuffer = front;
+	}
+	spin_unlock(&i915->fb_tracking.lock);
+
+	return front;
+}
+
+void intel_frontbuffer_put(struct intel_frontbuffer *front)
+{
+	kref_put_lock(&front->ref,
+		      frontbuffer_release,
+		      &to_i915(front->obj->base.dev)->fb_tracking.lock);
+}
+
+/**
+ * intel_frontbuffer_track - update frontbuffer tracking
+ * @old: current buffer for the frontbuffer slots
+ * @new: new buffer for the frontbuffer slots
+ * @frontbuffer_bits: bitmask of frontbuffer slots
+ *
+ * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
+ * from @old and setting them in @new. Both @old and @new can be NULL.
+ */
+void intel_frontbuffer_track(struct intel_frontbuffer *old,
+			     struct intel_frontbuffer *new,
+			     unsigned int frontbuffer_bits)
+{
+	/*
+	 * Control of individual bits within the mask are guarded by
+	 * the owning plane->mutex, i.e. we can never see concurrent
+	 * manipulation of individual bits. But since the bitfield as a whole
+	 * is updated using RMW, we need to use atomics in order to update
+	 * the bits.
+	 */
+	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
+		     BITS_PER_TYPE(atomic_t));
+
+	if (old) {
+		WARN_ON(!(atomic_read(&old->bits) & frontbuffer_bits));
+		atomic_andnot(frontbuffer_bits, &old->bits);
+	}
+
+	if (new) {
+		WARN_ON(atomic_read(&new->bits) & frontbuffer_bits);
+		atomic_or(frontbuffer_bits, &new->bits);
+	}
 }
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.h b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
index 5727320c8084..adc64d61a4a5 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
@@ -24,7 +24,10 @@
 #ifndef __INTEL_FRONTBUFFER_H__
 #define __INTEL_FRONTBUFFER_H__
 
-#include "gem/i915_gem_object.h"
+#include <linux/atomic.h>
+#include <linux/kref.h>
+
+#include "i915_active.h"
 
 struct drm_i915_private;
 struct drm_i915_gem_object;
@@ -37,23 +40,30 @@ enum fb_op_origin {
 	ORIGIN_DIRTYFB,
 };
 
-void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
+struct intel_frontbuffer {
+	struct kref ref;
+	atomic_t bits;
+	struct i915_active write;
+	struct drm_i915_gem_object *obj;
+};
+
+void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915,
 				    unsigned frontbuffer_bits);
-void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
+void intel_frontbuffer_flip_complete(struct drm_i915_private *i915,
 				     unsigned frontbuffer_bits);
-void intel_frontbuffer_flip(struct drm_i915_private *dev_priv,
+void intel_frontbuffer_flip(struct drm_i915_private *i915,
 			    unsigned frontbuffer_bits);
 
-void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			       enum fb_op_origin origin,
-			       unsigned int frontbuffer_bits);
-void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-			  enum fb_op_origin origin,
-			  unsigned int frontbuffer_bits);
+struct intel_frontbuffer *
+intel_frontbuffer_get(struct drm_i915_gem_object *obj);
+
+void __intel_fb_invalidate(struct intel_frontbuffer *front,
+			   enum fb_op_origin origin,
+			   unsigned int frontbuffer_bits);
 
 /**
- * intel_fb_obj_invalidate - invalidate frontbuffer object
- * @obj: GEM object to invalidate
+ * intel_frontbuffer_invalidate - invalidate frontbuffer object
+ * @front: GEM object to invalidate
  * @origin: which operation caused the invalidation
  *
  * This function gets called every time rendering on the given object starts and
@@ -62,37 +72,53 @@ void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
  * until the rendering completes or a flip on this frontbuffer plane is
  * scheduled.
  */
-static inline bool intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-					   enum fb_op_origin origin)
+static inline bool intel_frontbuffer_invalidate(struct intel_frontbuffer *front,
+						enum fb_op_origin origin)
 {
 	unsigned int frontbuffer_bits;
 
-	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
+	if (!front)
+		return false;
+
+	frontbuffer_bits = atomic_read(&front->bits);
 	if (!frontbuffer_bits)
 		return false;
 
-	__intel_fb_obj_invalidate(obj, origin, frontbuffer_bits);
+	__intel_fb_invalidate(front, origin, frontbuffer_bits);
 	return true;
 }
 
+void __intel_fb_flush(struct intel_frontbuffer *front,
+		      enum fb_op_origin origin,
+		      unsigned int frontbuffer_bits);
+
 /**
- * intel_fb_obj_flush - flush frontbuffer object
- * @obj: GEM object to flush
+ * intel_frontbuffer_flush - flush frontbuffer object
+ * @front: GEM object to flush
  * @origin: which operation caused the flush
  *
  * This function gets called every time rendering on the given object has
  * completed and frontbuffer caching can be started again.
  */
-static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
-				      enum fb_op_origin origin)
+static inline void intel_frontbuffer_flush(struct intel_frontbuffer *front,
+					   enum fb_op_origin origin)
 {
 	unsigned int frontbuffer_bits;
 
-	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
+	if (!front)
+		return;
+
+	frontbuffer_bits = atomic_read(&front->bits);
 	if (!frontbuffer_bits)
 		return;
 
-	__intel_fb_obj_flush(obj, origin, frontbuffer_bits);
+	__intel_fb_flush(front, origin, frontbuffer_bits);
 }
 
+void intel_frontbuffer_track(struct intel_frontbuffer *old,
+			     struct intel_frontbuffer *new,
+			     unsigned int frontbuffer_bits);
+
+void intel_frontbuffer_put(struct intel_frontbuffer *front);
+
 #endif /* __INTEL_FRONTBUFFER_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 4f78586ee05e..e1248eace0e1 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -281,9 +281,9 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
 
 	WARN_ON(overlay->old_vma);
 
-	i915_gem_track_fb(overlay->vma ? overlay->vma->obj : NULL,
-			  vma ? vma->obj : NULL,
-			  INTEL_FRONTBUFFER_OVERLAY(pipe));
+	intel_frontbuffer_track(overlay->vma ? overlay->vma->obj->frontbuffer : NULL,
+				vma ? vma->obj->frontbuffer : NULL,
+				INTEL_FRONTBUFFER_OVERLAY(pipe));
 
 	intel_frontbuffer_flip_prepare(overlay->i915,
 				       INTEL_FRONTBUFFER_OVERLAY(pipe));
@@ -768,7 +768,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 		ret = PTR_ERR(vma);
 		goto out_pin_section;
 	}
-	intel_fb_obj_flush(new_bo, ORIGIN_DIRTYFB);
+	intel_frontbuffer_flush(new_bo->frontbuffer, ORIGIN_DIRTYFB);
 
 	ret = i915_vma_put_fence(vma);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index c31684682eaa..77944950d4c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -49,7 +49,7 @@ static void __i915_do_clflush(struct drm_i915_gem_object *obj)
 {
 	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
 	drm_clflush_sg(obj->mm.pages);
-	intel_fb_obj_flush(obj, ORIGIN_CPU);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 }
 
 static void i915_clflush_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 2e3ce2a69653..a1afc2690e9e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -551,13 +551,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	return 0;
 }
 
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -661,9 +654,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 
 	i915_gem_object_unlock(obj);
 
-	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj,
-					fb_write_origin(obj, write_domain));
+	if (write_domain)
+		intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
 
 out_unpin:
 	i915_gem_object_unpin_pages(obj);
@@ -783,7 +775,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 	}
 
 out:
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
 	obj->mm.dirty = true;
 	/* return with the pages pinned */
 	return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 1e7311493530..48c2cbe9b278 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -101,9 +101,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 		up_write(&mm->mmap_sem);
 		if (IS_ERR_VALUE(addr))
 			goto err;
-
-		/* This may race, but that's ok, it only gets set */
-		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
 	}
 	i915_gem_object_put(obj);
 
@@ -283,7 +280,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 		 * Userspace is now writing through an untracked VMA, abandon
 		 * all hope that the hardware is able to track future writes.
 		 */
-		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
 
 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
 		if (IS_ERR(vma) && !view.type) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 3929c3a6b281..0807bb5464cf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -46,16 +46,6 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
 	return kmem_cache_free(global.slab_objects, obj);
 }
 
-static void
-frontbuffer_retire(struct i915_active_request *active,
-		   struct i915_request *request)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(active, typeof(*obj), frontbuffer_write);
-
-	intel_fb_obj_flush(obj, ORIGIN_CS);
-}
-
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
@@ -72,10 +62,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	obj->ops = ops;
 
-	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
-	i915_active_request_init(&obj->frontbuffer_write,
-				 NULL, frontbuffer_retire);
-
 	obj->mm.madv = I915_MADV_WILLNEED;
 	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
 	mutex_init(&obj->mm.get_page.lock);
@@ -187,7 +173,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
 		GEM_BUG_ON(atomic_read(&obj->bind_count));
 		GEM_BUG_ON(obj->userfault_count);
-		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
 		GEM_BUG_ON(!list_empty(&obj->lut_list));
 
 		atomic_set(&obj->mm.pages_pin_count, 0);
@@ -230,6 +215,8 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
+	GEM_BUG_ON(i915_gem_object_is_framebuffer(obj));
+
 	/*
 	 * Before we free the object, make sure any pure RCU-only
 	 * read-side critical sections are complete, e.g.
@@ -261,13 +248,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 		queue_work(i915->wq, &i915->mm.free_work);
 }
 
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	return !(obj->cache_level == I915_CACHE_NONE ||
@@ -290,8 +270,7 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 		for_each_ggtt_vma(vma, obj)
 			intel_gt_flush_ggtt_writes(vma->vm->gt);
 
-		intel_fb_obj_flush(obj,
-				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+		intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 
 		for_each_ggtt_vma(vma, obj) {
 			if (vma->iomap)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 3714cf234d64..abc23e7e13a7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -161,7 +161,7 @@ i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
 static inline bool
 i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
 {
-	return READ_ONCE(obj->framebuffer_references);
+	return READ_ONCE(obj->frontbuffer);
 }
 
 static inline unsigned int
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index d474c6ac4100..ede0eb4218a8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -13,6 +13,7 @@
 #include "i915_selftest.h"
 
 struct drm_i915_gem_object;
+struct intel_fronbuffer;
 
 /*
  * struct i915_lut_handle tracks the fast lookups from handle to vma used
@@ -141,9 +142,7 @@ struct drm_i915_gem_object {
 	 */
 	u16 write_domain;
 
-	atomic_t frontbuffer_bits;
-	unsigned int frontbuffer_ggtt_origin; /* write once */
-	struct i915_active_request frontbuffer_write;
+	struct intel_frontbuffer *frontbuffer;
 
 	/** Current tiling stride for the object, if it's tiled. */
 	unsigned int tiling_and_stride;
@@ -224,9 +223,6 @@ struct drm_i915_gem_object {
 		bool quirked:1;
 	} mm;
 
-	/** References from framebuffers, locks out tiling changes. */
-	unsigned int framebuffer_references;
-
 	/** Record of address bit 17 of each page at last unbind. */
 	unsigned long *bit_17;
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2c640987c24d..ac66bae6e2f2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -138,7 +138,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct intel_engine_cs *engine;
 	struct i915_vma *vma;
-	unsigned int frontbuffer_bits;
 	int pin_count = 0;
 
 	seq_printf(m, "%pK: %c%c%c%c %8zdKiB %02x %02x %s%s%s",
@@ -228,10 +227,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	engine = i915_gem_object_last_write_engine(obj);
 	if (engine)
 		seq_printf(m, " (%s)", engine->name);
-
-	frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
-	if (frontbuffer_bits)
-		seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
 }
 
 struct file_stats {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 18be8b250a7c..525e6366d46c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2356,10 +2356,6 @@ int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      u32 handle, u64 *offset);
 int i915_gem_mmap_gtt_version(void);
 
-void i915_gem_track_fb(struct drm_i915_gem_object *old,
-		       struct drm_i915_gem_object *new,
-		       unsigned frontbuffer_bits);
-
 int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
 
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 29be25a7aade..71ee4c710252 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -139,17 +139,19 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 	void *vaddr = obj->phys_handle->vaddr + args->offset;
 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
 
-	/* We manually control the domain here and pretend that it
+	/*
+	 * We manually control the domain here and pretend that it
 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
 	 */
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
+
 	if (copy_from_user(vaddr, user_data, args->size))
 		return -EFAULT;
 
 	drm_clflush_virt_range(vaddr, args->size);
 	intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
 
-	intel_fb_obj_flush(obj, ORIGIN_CPU);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 	return 0;
 }
 
@@ -594,7 +596,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		goto out_unpin;
 	}
 
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
 
 	user_data = u64_to_user_ptr(args->data_ptr);
 	offset = args->offset;
@@ -636,7 +638,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		user_data += page_length;
 		offset += page_length;
 	}
-	intel_fb_obj_flush(obj, ORIGIN_CPU);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 
 	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
@@ -729,7 +731,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	intel_fb_obj_flush(obj, ORIGIN_CPU);
+	intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
 	i915_gem_object_unlock_fence(obj, fence);
 
 	return ret;
@@ -1763,39 +1765,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	return ret;
 }
 
-/**
- * i915_gem_track_fb - update frontbuffer tracking
- * @old: current GEM buffer for the frontbuffer slots
- * @new: new GEM buffer for the frontbuffer slots
- * @frontbuffer_bits: bitmask of frontbuffer slots
- *
- * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
- * from @old and setting them in @new. Both @old and @new can be NULL.
- */
-void i915_gem_track_fb(struct drm_i915_gem_object *old,
-		       struct drm_i915_gem_object *new,
-		       unsigned frontbuffer_bits)
-{
-	/* Control of individual bits within the mask are guarded by
-	 * the owning plane->mutex, i.e. we can never see concurrent
-	 * manipulation of individual bits. But since the bitfield as a whole
-	 * is updated using RMW, we need to use atomics in order to update
-	 * the bits.
-	 */
-	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
-		     BITS_PER_TYPE(atomic_t));
-
-	if (old) {
-		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
-		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
-	}
-
-	if (new) {
-		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
-		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
-	}
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_gem_device.c"
 #include "selftests/i915_gem.c"
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 8be1bbef40e5..d38ef2ef3ce4 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -908,8 +908,10 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 		return err;
 
 	if (flags & EXEC_OBJECT_WRITE) {
-		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
-			__i915_active_request_set(&obj->frontbuffer_write, rq);
+		if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS))
+			i915_active_ref(&obj->frontbuffer->write,
+					rq->fence.context,
+					rq);
 
 		reservation_object_add_excl_fence(vma->resv, &rq->fence);
 		obj->write_domain = I915_GEM_DOMAIN_RENDER;
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 13/18] drm/i915: Markup expected timeline locks for i915_active
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (10 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 12/18] drm/i915: Extract intel_frontbuffer active tracking Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 14/18] drm/i915: Remove logical HW ID Chris Wilson
                   ` (10 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

As every i915_active_request should be serialised by a dedicated lock,
i915_active consists of a tree of locks; one for each node. Markup up
the i915_active_request with what lock is supposed to be guarding it so
that we can verify that the serialised updated are indeed serialised.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/display/intel_overlay.c  |  2 +-
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 drivers/gpu/drm/i915/gt/intel_context.c       | 11 +++--------
 drivers/gpu/drm/i915/gt/intel_engine_pool.h   |  2 +-
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  7 +++----
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  4 ++++
 .../gpu/drm/i915/gt/selftests/mock_timeline.c |  2 +-
 drivers/gpu/drm/i915/i915_active.c            | 19 +++++++++++++++----
 drivers/gpu/drm/i915/i915_active.h            | 12 ++++++++++--
 drivers/gpu/drm/i915/i915_active_types.h      |  3 +++
 drivers/gpu/drm/i915/i915_vma.c               |  4 ++--
 drivers/gpu/drm/i915/selftests/i915_active.c  |  3 +--
 13 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index e1248eace0e1..eca41c4a5aa6 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -230,7 +230,7 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *))
 	if (IS_ERR(rq))
 		return rq;
 
-	err = i915_active_ref(&overlay->last_flip, rq->fence.context, rq);
+	err = i915_active_ref(&overlay->last_flip, rq->timeline, rq);
 	if (err) {
 		i915_request_add(rq);
 		return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index ac14677dd537..2536d1f54629 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work)
 	 * keep track of the GPU activity within this vma/request, and
 	 * propagate the signal from the request to w->dma.
 	 */
-	err = i915_active_ref(&vma->active, rq->fence.context, rq);
+	err = i915_active_ref(&vma->active, rq->timeline, rq);
 	if (err)
 		goto out_request;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index a6b0cb714292..cd1fd2e5423a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -908,7 +908,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (emit)
 			err = emit(rq, data);
 		if (err == 0)
-			err = i915_active_ref(&cb->base, rq->fence.context, rq);
+			err = i915_active_ref(&cb->base, rq->timeline, rq);
 
 		i915_request_add(rq);
 		if (err)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 9114953bf920..f55691d151ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -306,10 +306,10 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 
 		/* Queue this switch after current activity by this context. */
 		err = i915_active_request_set(&tl->last_request, rq);
+		mutex_unlock(&tl->mutex);
 		if (err)
-			goto unlock;
+			return err;
 	}
-	lockdep_assert_held(&tl->mutex);
 
 	/*
 	 * Guarantee context image and the timeline remains pinned until the
@@ -319,12 +319,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 	 * words transfer the pinned ce object to tracked active request.
 	 */
 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
-	err = i915_active_ref(&ce->active, rq->fence.context, rq);
-
-unlock:
-	if (rq->timeline != tl)
-		mutex_unlock(&tl->mutex);
-	return err;
+	return i915_active_ref(&ce->active, rq->timeline, rq);
 }
 
 struct i915_request *intel_context_create_request(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
index f7a0a660c1c9..8d069efd9457 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
@@ -18,7 +18,7 @@ static inline int
 intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
 			      struct i915_request *rq)
 {
-	return i915_active_ref(&node->active, rq->fence.context, rq);
+	return i915_active_ref(&node->active, rq->timeline, rq);
 }
 
 static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index eafd94d5e211..02fbe11b671b 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -254,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request);
+	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
@@ -440,8 +440,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 	 * free it after the current request is retired, which ensures that
 	 * all writes into the cacheline from previous requests are complete.
 	 */
-	err = i915_active_ref(&tl->hwsp_cacheline->active,
-			      tl->fence_context, rq);
+	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
 	if (err)
 		goto err_cacheline;
 
@@ -492,7 +491,7 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
 static int cacheline_ref(struct intel_timeline_cacheline *cl,
 			 struct i915_request *rq)
 {
-	return i915_active_ref(&cl->active, rq->fence.context, rq);
+	return i915_active_ref(&cl->active, rq->timeline, rq);
 }
 
 int intel_timeline_read_hwsp(struct i915_request *from,
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index d54113697745..321481403165 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -689,7 +689,9 @@ static int live_hwsp_wrap(void *arg)
 
 		tl->seqno = -4u;
 
+		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
+		mutex_unlock(&tl->mutex);
 		if (err) {
 			i915_request_add(rq);
 			goto out;
@@ -704,7 +706,9 @@ static int live_hwsp_wrap(void *arg)
 		}
 		hwsp_seqno[0] = tl->hwsp_seqno;
 
+		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
+		mutex_unlock(&tl->mutex);
 		if (err) {
 			i915_request_add(rq);
 			goto out;
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 5c549205828a..598170efcaf6 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request);
+	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 7698fcaa648a..8ec53f2dc1dc 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -163,10 +163,11 @@ node_retire(struct i915_active_request *base, struct i915_request *rq)
 }
 
 static struct i915_active_request *
-active_instance(struct i915_active *ref, u64 idx)
+active_instance(struct i915_active *ref, struct intel_timeline *tl)
 {
 	struct active_node *node, *prealloc;
 	struct rb_node **p, *parent;
+	u64 idx = tl->fence_context;
 
 	/*
 	 * We track the most recently used timeline to skip a rbtree search
@@ -205,7 +206,7 @@ active_instance(struct i915_active *ref, u64 idx)
 	}
 
 	node = prealloc;
-	i915_active_request_init(&node->base, NULL, node_retire);
+	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
 	node->ref = ref;
 	node->timeline = idx;
 
@@ -281,18 +282,20 @@ static bool __active_del_barrier(struct i915_active *ref,
 }
 
 int i915_active_ref(struct i915_active *ref,
-		    u64 timeline,
+		    struct intel_timeline *tl,
 		    struct i915_request *rq)
 {
 	struct i915_active_request *active;
 	int err;
 
+	lockdep_assert_held(&tl->mutex);
+
 	/* Prevent reaping in case we malloc/wait while building the tree */
 	err = i915_active_acquire(ref);
 	if (err)
 		return err;
 
-	active = active_instance(ref, timeline);
+	active = active_instance(ref, tl);
 	if (!active) {
 		err = -ENOMEM;
 		goto out;
@@ -579,6 +582,10 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 				goto unwind;
 			}
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+			node->base.lock =
+				&engine->kernel_context->timeline->mutex;
+#endif
 			RCU_INIT_POINTER(node->base.request, NULL);
 			node->base.retire = node_retire;
 			node->timeline = idx;
@@ -683,6 +690,10 @@ int i915_active_request_set(struct i915_active_request *active,
 {
 	int err;
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	lockdep_assert_held(active->lock);
+#endif
+
 	/* Must maintain ordering wrt previous active requests */
 	err = i915_request_await_active_request(rq, active);
 	if (err)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index f6d730cf2fe6..f95058f99057 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -58,15 +58,20 @@ void i915_active_retire_noop(struct i915_active_request *active,
  */
 static inline void
 i915_active_request_init(struct i915_active_request *active,
+			 struct mutex *lock,
 			 struct i915_request *rq,
 			 i915_active_retire_fn retire)
 {
 	RCU_INIT_POINTER(active->request, rq);
 	INIT_LIST_HEAD(&active->link);
 	active->retire = retire ?: i915_active_retire_noop;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	active->lock = lock;
+#endif
 }
 
-#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
+#define INIT_ACTIVE_REQUEST(name, lock) \
+	i915_active_request_init((name), (lock), NULL, NULL)
 
 /**
  * i915_active_request_set - updates the tracker to watch the current request
@@ -81,6 +86,9 @@ static inline void
 __i915_active_request_set(struct i915_active_request *active,
 			  struct i915_request *request)
 {
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	lockdep_assert_held(active->lock);
+#endif
 	list_move(&active->link, &request->active_list);
 	rcu_assign_pointer(active->request, request);
 }
@@ -362,7 +370,7 @@ void __i915_active_init(struct drm_i915_private *i915,
 } while (0)
 
 int i915_active_ref(struct i915_active *ref,
-		    u64 timeline,
+		    struct intel_timeline *tl,
 		    struct i915_request *rq);
 
 int i915_active_wait(struct i915_active *ref);
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index ae3ee441c114..d857bd12aa7e 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -24,6 +24,9 @@ struct i915_active_request {
 	struct i915_request __rcu *request;
 	struct list_head link;
 	i915_active_retire_fn retire;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	struct mutex *lock;
+#endif
 };
 
 struct active_node;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d38ef2ef3ce4..68260b40982b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -903,14 +903,14 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	err = i915_active_ref(&vma->active, rq->fence.context, rq);
+	err = i915_active_ref(&vma->active, rq->timeline, rq);
 	if (unlikely(err))
 		return err;
 
 	if (flags & EXEC_OBJECT_WRITE) {
 		if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS))
 			i915_active_ref(&obj->frontbuffer->write,
-					rq->fence.context,
+					rq->timeline,
 					rq);
 
 		reservation_object_add_excl_fence(vma->resv, &rq->fence);
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index e5cd5d47e380..77d844ac8b71 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -110,8 +110,7 @@ __live_active_setup(struct drm_i915_private *i915)
 						       submit,
 						       GFP_KERNEL);
 		if (err >= 0)
-			err = i915_active_ref(&active->base,
-					      rq->fence.context, rq);
+			err = i915_active_ref(&active->base, rq->timeline, rq);
 		i915_request_add(rq);
 		if (err) {
 			pr_err("Failed to track active ref!\n");
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 14/18] drm/i915: Remove logical HW ID
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (11 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 13/18] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 15/18] drm/i915: Move context management under GEM Chris Wilson
                   ` (9 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

With the introduction of ctx->engines[] we allow multiple logical
contexts to be used on the same engine (e.g. with virtual engines). Each
logical context requires a unique tag in order for context-switching to
occur correctly between them.

We only need to keep a unique tag for the active lifetime of the
context, and for as long as we need to identify that context. The HW
uses the tag to determine if it should use a lite-restore (why not the
LRCA?) and passes the tag back for various status identifies. The only
status we need to track is for OA, so when using perf, we assign the
specific context a unique tag.

Note that although we it call it the HW ID, the hardware calls it the SW
ID! That's all it is a cookie we give to the HW that it passes back on
event notifications so we can identify the source context.

Fixes: 976b55f0e1db ("drm/i915: Allow a context to define its set of engines")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 144 ------------------
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 --
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  18 ---
 .../drm/i915/gem/selftests/i915_gem_context.c |  13 +-
 .../gpu/drm/i915/gem/selftests/mock_context.c |   8 -
 drivers/gpu/drm/i915/gt/intel_context_types.h |   1 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   4 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  29 ++--
 drivers/gpu/drm/i915/gvt/kvmgt.c              |  17 ---
 drivers/gpu/drm/i915/i915_debugfs.c           |   3 -
 drivers/gpu/drm/i915/i915_gpu_error.c         |   7 +-
 drivers/gpu/drm/i915/i915_gpu_error.h         |   1 -
 drivers/gpu/drm/i915/i915_perf.c              |  30 ++--
 drivers/gpu/drm/i915/i915_trace.h             |  38 ++---
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   2 +-
 16 files changed, 50 insertions(+), 284 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index cd1fd2e5423a..774a3ac853a8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -167,95 +167,6 @@ lookup_user_engine(struct i915_gem_context *ctx,
 	return i915_gem_context_get_engine(ctx, idx);
 }
 
-static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
-{
-	unsigned int max;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	if (INTEL_GEN(i915) >= 11)
-		max = GEN11_MAX_CONTEXT_HW_ID;
-	else if (USES_GUC_SUBMISSION(i915))
-		/*
-		 * When using GuC in proxy submission, GuC consumes the
-		 * highest bit in the context id to indicate proxy submission.
-		 */
-		max = MAX_GUC_CONTEXT_HW_ID;
-	else
-		max = MAX_CONTEXT_HW_ID;
-
-	return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
-}
-
-static int steal_hw_id(struct drm_i915_private *i915)
-{
-	struct i915_gem_context *ctx, *cn;
-	LIST_HEAD(pinned);
-	int id = -ENOSPC;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	list_for_each_entry_safe(ctx, cn,
-				 &i915->contexts.hw_id_list, hw_id_link) {
-		if (atomic_read(&ctx->hw_id_pin_count)) {
-			list_move_tail(&ctx->hw_id_link, &pinned);
-			continue;
-		}
-
-		GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
-		list_del_init(&ctx->hw_id_link);
-		id = ctx->hw_id;
-		break;
-	}
-
-	/*
-	 * Remember how far we got up on the last repossesion scan, so the
-	 * list is kept in a "least recently scanned" order.
-	 */
-	list_splice_tail(&pinned, &i915->contexts.hw_id_list);
-	return id;
-}
-
-static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
-{
-	int ret;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	/*
-	 * We prefer to steal/stall ourselves and our users over that of the
-	 * entire system. That may be a little unfair to our users, and
-	 * even hurt high priority clients. The choice is whether to oomkill
-	 * something else, or steal a context id.
-	 */
-	ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
-	if (unlikely(ret < 0)) {
-		ret = steal_hw_id(i915);
-		if (ret < 0) /* once again for the correct errno code */
-			ret = new_hw_id(i915, GFP_KERNEL);
-		if (ret < 0)
-			return ret;
-	}
-
-	*out = ret;
-	return 0;
-}
-
-static void release_hw_id(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
-
-	if (list_empty(&ctx->hw_id_link))
-		return;
-
-	mutex_lock(&i915->contexts.mutex);
-	if (!list_empty(&ctx->hw_id_link)) {
-		ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
-		list_del_init(&ctx->hw_id_link);
-	}
-	mutex_unlock(&i915->contexts.mutex);
-}
-
 static void __free_engines(struct i915_gem_engines *e, unsigned int count)
 {
 	while (count--) {
@@ -310,7 +221,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
-	release_hw_id(ctx);
 	if (ctx->vm)
 		i915_vm_put(ctx->vm);
 
@@ -382,12 +292,6 @@ static void context_close(struct i915_gem_context *ctx)
 	i915_gem_context_set_closed(ctx);
 	ctx->file_priv = ERR_PTR(-EBADF);
 
-	/*
-	 * This context will never again be assinged to HW, so we can
-	 * reuse its ID for the next context.
-	 */
-	release_hw_id(ctx);
-
 	/*
 	 * The LUT uses the VMA as a backpointer to unref the object,
 	 * so we need to clear the LUT before we close all the VMA (inside
@@ -426,7 +330,6 @@ __create_context(struct drm_i915_private *i915)
 	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->hw_id_link);
 
 	/* NB: Mark all slices as needing a remap so that when the context first
 	 * loads it will restore whatever remap state already exists. If there
@@ -580,18 +483,11 @@ struct i915_gem_context *
 i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 {
 	struct i915_gem_context *ctx;
-	int err;
 
 	ctx = i915_gem_create_context(i915, 0);
 	if (IS_ERR(ctx))
 		return ctx;
 
-	err = i915_gem_context_pin_hw_id(ctx);
-	if (err) {
-		destroy_kernel_context(&ctx);
-		return ERR_PTR(err);
-	}
-
 	i915_gem_context_clear_bannable(ctx);
 	ctx->sched.priority = I915_USER_PRIORITY(prio);
 
@@ -630,15 +526,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 		DRM_ERROR("Failed to create default global context\n");
 		return PTR_ERR(ctx);
 	}
-	/*
-	 * For easy recognisablity, we want the kernel context to be 0 and then
-	 * all user contexts will have non-zero hw_id. Kernel contexts are
-	 * permanently pinned, so that we never suffer a stall and can
-	 * use them from any allocation context (e.g. for evicting other
-	 * contexts and from inside the shrinker).
-	 */
-	GEM_BUG_ON(ctx->hw_id);
-	GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
 	dev_priv->kernel_context = ctx;
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
@@ -652,10 +539,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
 	destroy_kernel_context(&i915->kernel_context);
-
-	/* Must free all deferred contexts (via flush_workqueue) first */
-	GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
-	ida_destroy(&i915->contexts.hw_ida);
 }
 
 static int context_idr_cleanup(int id, void *p, void *data)
@@ -2330,33 +2213,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
 	return ret;
 }
 
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
-	int err = 0;
-
-	mutex_lock(&i915->contexts.mutex);
-
-	GEM_BUG_ON(i915_gem_context_is_closed(ctx));
-
-	if (list_empty(&ctx->hw_id_link)) {
-		GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
-
-		err = assign_hw_id(i915, &ctx->hw_id);
-		if (err)
-			goto out_unlock;
-
-		list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
-	}
-
-	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
-	atomic_inc(&ctx->hw_id_pin_count);
-
-out_unlock:
-	mutex_unlock(&i915->contexts.mutex);
-	return err;
-}
-
 /* GEM context-engines iterator: for_each_gem_engine() */
 struct intel_context *
 i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 176978608b6f..50bc27d30c03 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -112,21 +112,6 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
 	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
 }
 
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
-static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
-	if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
-		return 0;
-
-	return __i915_gem_context_pin_hw_id(ctx);
-}
-
-static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
-{
-	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
-	atomic_dec(&ctx->hw_id_pin_count);
-}
-
 static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 {
 	return !ctx->file_priv;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 260d59cc3de8..87be27877e22 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -147,24 +147,6 @@ struct i915_gem_context {
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
 #define CONTEXT_USER_ENGINES		3
 
-	/**
-	 * @hw_id: - unique identifier for the context
-	 *
-	 * The hardware needs to uniquely identify the context for a few
-	 * functions like fault reporting, PASID, scheduling. The
-	 * &drm_i915_private.context_hw_ida is used to assign a unqiue
-	 * id for the lifetime of the context.
-	 *
-	 * @hw_id_pin_count: - number of times this context had been pinned
-	 * for use (should be, at most, once per engine).
-	 *
-	 * @hw_id_link: - all contexts with an assigned id are tracked
-	 * for possible repossession.
-	 */
-	unsigned int hw_id;
-	atomic_t hw_id_pin_count;
-	struct list_head hw_id_link;
-
 	struct mutex mutex;
 
 	struct i915_sched_attr sched;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index dd87e6cd612e..e4fec34228bf 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -410,9 +410,9 @@ static int igt_ctx_exec(void *arg)
 
 			err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
+				       engine->name,
 				       yesno(!!ctx->vm), err);
 				goto out_unlock;
 			}
@@ -529,9 +529,9 @@ static int igt_shared_ctx_exec(void *arg)
 
 			err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
+				       engine->name,
 				       yesno(!!ctx->vm), err);
 				kernel_context_close(ctx);
 				goto out_test;
@@ -1098,10 +1098,9 @@ static int igt_ctx_readonly(void *arg)
 
 			err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
-				       yesno(!!ctx->vm), err);
+				       engine->name, yesno(!!ctx->vm), err);
 				goto out_unlock;
 			}
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index be8974ccff24..0104f16b1327 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915,
 {
 	struct i915_gem_context *ctx;
 	struct i915_gem_engines *e;
-	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -30,13 +29,8 @@ mock_context(struct drm_i915_private *i915,
 	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->hw_id_link);
 	mutex_init(&ctx->mutex);
 
-	ret = i915_gem_context_pin_hw_id(ctx);
-	if (ret < 0)
-		goto err_engines;
-
 	if (name) {
 		struct i915_ppgtt *ppgtt;
 
@@ -54,8 +48,6 @@ mock_context(struct drm_i915_private *i915,
 
 	return ctx;
 
-err_engines:
-	free_engines(rcu_access_pointer(ctx->engines));
 err_free:
 	kfree(ctx);
 	return NULL;
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index bf9cedfccbf0..a3578ef24079 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -58,6 +58,7 @@ struct intel_context {
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
+	u32 hw_tag;
 
 	unsigned int active_count; /* protected by timeline->mutex */
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9965a32601d6..0046abef298d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -273,10 +273,12 @@ struct intel_engine_cs {
 	u8 uabi_class;
 	u8 uabi_instance;
 
+	u32 uabi_capabilities;
 	u32 context_size;
 	u32 mmio_base;
 
-	u32 uabi_capabilities;
+	unsigned int hw_tag;
+#define NUM_HW_TAG (256)
 
 	struct rb_node uabi_node;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 71c69381a1aa..947bf994d586 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -415,12 +415,8 @@ assert_priority_queue(const struct i915_request *prev,
 static u64
 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 {
-	struct i915_gem_context *ctx = ce->gem_context;
 	u64 desc;
 
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
-	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
-
 	desc = INTEL_LEGACY_32B_CONTEXT;
 	if (i915_vm_is_4lvl(ce->vm))
 		desc = INTEL_LEGACY_64B_CONTEXT;
@@ -438,20 +434,11 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 	 * anything below.
 	 */
 	if (INTEL_GEN(engine->i915) >= 11) {
-		GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
-		desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
-								/* bits 37-47 */
-
 		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
 								/* bits 48-53 */
 
-		/* TODO: decide what to do with SW counter (bits 55-60) */
-
 		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
 								/* bits 61-63 */
-	} else {
-		GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
-		desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;	/* bits 32-52 */
 	}
 
 	return desc;
@@ -555,6 +542,15 @@ __execlists_schedule_in(struct i915_request *rq)
 
 	intel_context_get(ce);
 
+	if (ce->hw_tag) {
+		ce->lrc_desc |= (u64)ce->hw_tag << 32;
+	} else {
+		ce->lrc_desc &= ~GENMASK_ULL(47, 37);
+		ce->lrc_desc |=
+			(u64)(engine->hw_tag++ % NUM_HW_TAG) <<
+			GEN11_SW_CTX_ID_SHIFT;
+	}
+
 	intel_gt_pm_get(engine->gt);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
 	intel_engine_context_in(engine);
@@ -1617,7 +1613,6 @@ static void execlists_context_destroy(struct kref *kref)
 
 static void execlists_context_unpin(struct intel_context *ce)
 {
-	i915_gem_context_unpin_hw_id(ce->gem_context);
 	i915_gem_object_unpin_map(ce->state->obj);
 	intel_ring_reset(ce->ring, ce->ring->tail);
 }
@@ -1667,18 +1662,12 @@ __execlists_context_pin(struct intel_context *ce,
 		goto unpin_active;
 	}
 
-	ret = i915_gem_context_pin_hw_id(ce->gem_context);
-	if (ret)
-		goto unpin_map;
-
 	ce->lrc_desc = lrc_descriptor(ce, engine);
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	__execlists_update_reg_state(ce, engine);
 
 	return 0;
 
-unpin_map:
-	i915_gem_object_unpin_map(ce->state->obj);
 unpin_active:
 	intel_context_active_release(ce);
 err:
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 23aa3e50cbf8..895ddff7fa42 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1566,27 +1566,10 @@ vgpu_id_show(struct device *dev, struct device_attribute *attr,
 	return sprintf(buf, "\n");
 }
 
-static ssize_t
-hw_id_show(struct device *dev, struct device_attribute *attr,
-	   char *buf)
-{
-	struct mdev_device *mdev = mdev_from_dev(dev);
-
-	if (mdev) {
-		struct intel_vgpu *vgpu = (struct intel_vgpu *)
-			mdev_get_drvdata(mdev);
-		return sprintf(buf, "%u\n",
-			       vgpu->submission.shadow[0]->gem_context->hw_id);
-	}
-	return sprintf(buf, "\n");
-}
-
 static DEVICE_ATTR_RO(vgpu_id);
-static DEVICE_ATTR_RO(hw_id);
 
 static struct attribute *intel_vgpu_attrs[] = {
 	&dev_attr_vgpu_id.attr,
-	&dev_attr_hw_id.attr,
 	NULL
 };
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ac66bae6e2f2..a3393d9fa84e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1577,9 +1577,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		struct intel_context *ce;
 
 		seq_puts(m, "HW context ");
-		if (!list_empty(&ctx->hw_id_link))
-			seq_printf(m, "%x [pin %u]", ctx->hw_id,
-				   atomic_read(&ctx->hw_id_pin_count));
 		if (ctx->pid) {
 			struct task_struct *task;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 92986d3f6995..3b5bfe50e00a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -470,9 +470,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
 				const struct drm_i915_error_context *ctx)
 {
-	err_printf(m, "%s%s[%d] hw_id %d, prio %d, guilty %d active %d\n",
-		   header, ctx->comm, ctx->pid, ctx->hw_id,
-		   ctx->sched_attr.priority, ctx->guilty, ctx->active);
+	err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
+		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
+		   ctx->guilty, ctx->active);
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
@@ -1263,7 +1263,6 @@ static bool record_context(struct drm_i915_error_context *e,
 		rcu_read_unlock();
 	}
 
-	e->hw_id = ctx->hw_id;
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index df9f57766626..6b3ba94f7337 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -118,7 +118,6 @@ struct i915_gpu_state {
 		struct drm_i915_error_context {
 			char comm[TASK_COMM_LEN];
 			pid_t pid;
-			u32 hw_id;
 			int active;
 			int guilty;
 			struct i915_sched_attr sched_attr;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e42b86827d6b..3d43cde3ec50 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1292,21 +1292,14 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		} else {
 			stream->specific_ctx_id_mask =
 				(1U << GEN8_CTX_ID_WIDTH) - 1;
-			stream->specific_ctx_id =
-				upper_32_bits(ce->lrc_desc);
-			stream->specific_ctx_id &=
-				stream->specific_ctx_id_mask;
+			stream->specific_ctx_id = stream->specific_ctx_id_mask;
 		}
 		break;
 
 	case 11: {
 		stream->specific_ctx_id_mask =
-			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
-			((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
-			((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
-		stream->specific_ctx_id = upper_32_bits(ce->lrc_desc);
-		stream->specific_ctx_id &=
-			stream->specific_ctx_id_mask;
+			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
+		stream->specific_ctx_id = stream->specific_ctx_id_mask;
 		break;
 	}
 
@@ -1314,6 +1307,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		MISSING_CASE(INTEL_GEN(i915));
 	}
 
+	ce->hw_tag = stream->specific_ctx_id_mask;
+
 	DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
 			 stream->specific_ctx_id,
 			 stream->specific_ctx_id_mask);
@@ -1330,18 +1325,19 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
  */
 static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct drm_i915_private *i915 = stream->dev_priv;
 	struct intel_context *ce;
 
-	stream->specific_ctx_id = INVALID_CTX_ID;
-	stream->specific_ctx_id_mask = 0;
-
 	ce = fetch_and_zero(&stream->pinned_ctx);
-	if (ce) {
-		mutex_lock(&dev_priv->drm.struct_mutex);
+	if (ce)  {
+		mutex_lock(&i915->drm.struct_mutex);
+		ce->hw_tag = 0;
 		intel_context_unpin(ce);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
+		mutex_unlock(&i915->drm.struct_mutex);
 	}
+
+	stream->specific_ctx_id = INVALID_CTX_ID;
+	stream->specific_ctx_id_mask = 0;
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 24f2944da09d..1f2cf6cfafb5 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -665,7 +665,6 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -675,7 +674,6 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -683,10 +681,9 @@ TRACE_EVENT(i915_request_queue,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
-		      __entry->flags)
+		      __entry->ctx, __entry->seqno, __entry->flags)
 );
 
 DECLARE_EVENT_CLASS(i915_request,
@@ -695,7 +692,6 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -704,16 +700,15 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno)
+		      __entry->ctx, __entry->seqno)
 );
 
 DEFINE_EVENT(i915_request, i915_request_add,
@@ -738,7 +733,6 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -749,7 +743,6 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -758,9 +751,9 @@ TRACE_EVENT(i915_request_in,
 			   __entry->port = port;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
+		      __entry->ctx, __entry->seqno,
 		      __entry->prio, __entry->port)
 );
 
@@ -770,7 +763,6 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -780,7 +772,6 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -788,10 +779,9 @@ TRACE_EVENT(i915_request_out,
 			   __entry->completed = i915_request_completed(rq);
 			   ),
 
-		    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u",
+		    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u",
 			      __entry->dev, __entry->class, __entry->instance,
-			      __entry->hw_id, __entry->ctx, __entry->seqno,
-			      __entry->completed)
+			      __entry->ctx, __entry->seqno, __entry->completed)
 );
 
 #else
@@ -829,7 +819,6 @@ TRACE_EVENT(i915_request_wait_begin,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -845,7 +834,6 @@ TRACE_EVENT(i915_request_wait_begin,
 	     */
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->uabi_instance;
 			   __entry->ctx = rq->fence.context;
@@ -853,9 +841,9 @@ TRACE_EVENT(i915_request_wait_begin,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
+		      __entry->ctx, __entry->seqno,
 		      __entry->flags)
 );
 
@@ -958,19 +946,17 @@ DECLARE_EVENT_CLASS(i915_context,
 	TP_STRUCT__entry(
 			__field(u32, dev)
 			__field(struct i915_gem_context *, ctx)
-			__field(u32, hw_id)
 			__field(struct i915_address_space *, vm)
 	),
 
 	TP_fast_assign(
 			__entry->dev = ctx->i915->drm.primary->index;
 			__entry->ctx = ctx;
-			__entry->hw_id = ctx->hw_id;
 			__entry->vm = ctx->vm;
 	),
 
-	TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u",
-		  __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id)
+	TP_printk("dev=%u, ctx=%p, ctx_vm=%p",
+		  __entry->dev, __entry->ctx, __entry->vm)
 )
 
 DEFINE_EVENT(i915_context, i915_context_create,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index cb30c669b1b7..8b31fdf6a6f7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -478,8 +478,8 @@ static int igt_evict_contexts(void *arg)
 			if (IS_ERR(rq)) {
 				/* When full, fail_if_busy will trigger EBUSY */
 				if (PTR_ERR(rq) != -EBUSY) {
-					pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n",
-					       ctx->hw_id, engine->name,
+					pr_err("Unexpected error from request alloc (on %s): %d\n",
+					       engine->name,
 					       (int)PTR_ERR(rq));
 					err = PTR_ERR(rq);
 				}
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index a5bec0a4cdcc..73848a72837c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -170,7 +170,7 @@ static int igt_vma_create(void *arg)
 		}
 
 		nc = 0;
-		for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) {
+		for_each_prime_number(num_ctx, NUM_HW_TAG) {
 			for (; nc < num_ctx; nc++) {
 				ctx = mock_context(i915, "mock");
 				if (!ctx)
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 15/18] drm/i915: Move context management under GEM
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (12 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 14/18] drm/i915: Remove logical HW ID Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 16/18] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
                   ` (8 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

Keep track of the GEM contexts underneath i915->gem.contexts and assign
them their own lock for the purposes of list management.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 108 +++++++-----------
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |   4 +-
 .../gpu/drm/i915/gem/selftests/mock_context.c |   2 +-
 drivers/gpu/drm/i915/i915_debugfs.c           |  16 +--
 drivers/gpu/drm/i915/i915_drv.c               |   2 -
 drivers/gpu/drm/i915/i915_drv.h               |  24 ++--
 drivers/gpu/drm/i915/i915_gem.c               |   8 +-
 drivers/gpu/drm/i915/i915_perf.c              |  13 ++-
 drivers/gpu/drm/i915/i915_sysfs.c             |  38 +++---
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   4 +-
 10 files changed, 93 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 774a3ac853a8..51b8101e9836 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -218,9 +218,12 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
-	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
+	mutex_lock(&ctx->i915->gem.contexts.mutex);
+	list_del(&ctx->link);
+	mutex_unlock(&ctx->i915->gem.contexts.mutex);
+
 	if (ctx->vm)
 		i915_vm_put(ctx->vm);
 
@@ -233,56 +236,40 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	kfree(ctx->name);
 	put_pid(ctx->pid);
 
-	list_del(&ctx->link);
 	mutex_destroy(&ctx->mutex);
 
 	kfree_rcu(ctx, rcu);
 }
 
-static void contexts_free(struct drm_i915_private *i915)
+static void contexts_free_all(struct llist_node *list)
 {
-	struct llist_node *freed = llist_del_all(&i915->contexts.free_list);
 	struct i915_gem_context *ctx, *cn;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	llist_for_each_entry_safe(ctx, cn, freed, free_link)
+	llist_for_each_entry_safe(ctx, cn, list, free_link)
 		i915_gem_context_free(ctx);
 }
 
-static void contexts_free_first(struct drm_i915_private *i915)
+static void contexts_flush_free(struct i915_gem_contexts *gc)
 {
-	struct i915_gem_context *ctx;
-	struct llist_node *freed;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	freed = llist_del_first(&i915->contexts.free_list);
-	if (!freed)
-		return;
-
-	ctx = container_of(freed, typeof(*ctx), free_link);
-	i915_gem_context_free(ctx);
+	contexts_free_all(llist_del_all(&gc->free_list));
 }
 
 static void contexts_free_worker(struct work_struct *work)
 {
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), contexts.free_work);
+	struct i915_gem_contexts *gc =
+		container_of(work, typeof(*gc), free_work);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	contexts_free(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
+	contexts_flush_free(gc);
 }
 
 void i915_gem_context_release(struct kref *ref)
 {
 	struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
-	struct drm_i915_private *i915 = ctx->i915;
+	struct i915_gem_contexts *gc = &ctx->i915->gem.contexts;
 
 	trace_i915_context_free(ctx);
-	if (llist_add(&ctx->free_link, &i915->contexts.free_list))
-		queue_work(i915->wq, &i915->contexts.free_work);
+	if (llist_add(&ctx->free_link, &gc->free_list))
+		queue_work(ctx->i915->wq, &gc->free_work);
 }
 
 static void context_close(struct i915_gem_context *ctx)
@@ -316,7 +303,6 @@ __create_context(struct drm_i915_private *i915)
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ctx->ref);
-	list_add_tail(&ctx->link, &i915->contexts.list);
 	ctx->i915 = i915;
 	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
 	mutex_init(&ctx->mutex);
@@ -342,6 +328,10 @@ __create_context(struct drm_i915_private *i915)
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
+	mutex_lock(&i915->gem.contexts.mutex);
+	list_add_tail(&ctx->link, &i915->gem.contexts.list);
+	mutex_unlock(&i915->gem.contexts.mutex);
+
 	return ctx;
 
 err_free:
@@ -416,27 +406,25 @@ static void __assign_timeline(struct i915_gem_context *ctx,
 }
 
 static struct i915_gem_context *
-i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
+i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
 {
 	struct i915_gem_context *ctx;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
-	    !HAS_EXECLISTS(dev_priv))
+	    !HAS_EXECLISTS(i915))
 		return ERR_PTR(-EINVAL);
 
-	/* Reap the most stale context */
-	contexts_free_first(dev_priv);
+	/* Reap the stale contexts */
+	contexts_flush_free(&i915->gem.contexts);
 
-	ctx = __create_context(dev_priv);
+	ctx = __create_context(i915);
 	if (IS_ERR(ctx))
 		return ctx;
 
-	if (HAS_FULL_PPGTT(dev_priv)) {
+	if (HAS_FULL_PPGTT(i915)) {
 		struct i915_ppgtt *ppgtt;
 
-		ppgtt = i915_ppgtt_create(dev_priv);
+		ppgtt = i915_ppgtt_create(i915);
 		if (IS_ERR(ppgtt)) {
 			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 					 PTR_ERR(ppgtt));
@@ -451,7 +439,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
 		struct intel_timeline *timeline;
 
-		timeline = intel_timeline_create(&dev_priv->gt, NULL);
+		timeline = intel_timeline_create(&i915->gt, NULL);
 		if (IS_ERR(timeline)) {
 			context_close(ctx);
 			return ERR_CAST(timeline);
@@ -496,45 +484,39 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	return ctx;
 }
 
-static void init_contexts(struct drm_i915_private *i915)
+static void init_contexts(struct i915_gem_contexts *gc)
 {
-	mutex_init(&i915->contexts.mutex);
-	INIT_LIST_HEAD(&i915->contexts.list);
-
-	/* Using the simple ida interface, the max is limited by sizeof(int) */
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
-	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
-	ida_init(&i915->contexts.hw_ida);
-	INIT_LIST_HEAD(&i915->contexts.hw_id_list);
+	mutex_init(&gc->mutex);
+	INIT_LIST_HEAD(&gc->list);
 
-	INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
-	init_llist_head(&i915->contexts.free_list);
+	INIT_WORK(&gc->free_work, contexts_free_worker);
+	init_llist_head(&gc->free_list);
 }
 
-int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
+int i915_gem_init_contexts(struct drm_i915_private *i915)
 {
 	struct i915_gem_context *ctx;
 
 	/* Reassure ourselves we are only called once */
-	GEM_BUG_ON(dev_priv->kernel_context);
+	GEM_BUG_ON(i915->kernel_context);
 
-	init_contexts(dev_priv);
+	init_contexts(&i915->gem.contexts);
 
 	/* lowest priority; idle task */
-	ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
+	ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MIN);
 	if (IS_ERR(ctx)) {
 		DRM_ERROR("Failed to create default global context\n");
 		return PTR_ERR(ctx);
 	}
-	dev_priv->kernel_context = ctx;
+	i915->kernel_context = ctx;
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
-			 DRIVER_CAPS(dev_priv)->has_logical_contexts ?
+			 DRIVER_CAPS(i915)->has_logical_contexts ?
 			 "logical" : "fake");
 	return 0;
 }
 
-void i915_gem_contexts_fini(struct drm_i915_private *i915)
+void i915_gem_driver_release__contexts(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
@@ -597,9 +579,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	idr_init(&file_priv->context_idr);
 	idr_init_base(&file_priv->vm_idr, 1);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	ctx = i915_gem_create_context(i915, 0);
-	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto err;
@@ -627,6 +607,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 void i915_gem_context_close(struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_private *i915 = file_priv->dev_priv;
 
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 	idr_destroy(&file_priv->context_idr);
@@ -635,6 +616,8 @@ void i915_gem_context_close(struct drm_file *file)
 	idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
 	idr_destroy(&file_priv->vm_idr);
 	mutex_destroy(&file_priv->vm_idr_lock);
+
+	contexts_flush_free(&i915->gem.contexts);
 }
 
 int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
@@ -1975,12 +1958,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 		return -EIO;
 	}
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
 	ext_data.ctx = i915_gem_create_context(i915, args->flags);
-	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ext_data.ctx))
 		return PTR_ERR(ext_data.ctx);
 
@@ -2178,7 +2156,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
 				       void *data, struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_reset_stats *args = data;
 	struct i915_gem_context *ctx;
 	int ret;
@@ -2200,7 +2178,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
 	 */
 
 	if (capable(CAP_SYS_ADMIN))
-		args->reset_count = i915_reset_count(&dev_priv->gpu_error);
+		args->reset_count = i915_reset_count(&i915->gpu_error);
 	else
 		args->reset_count = 0;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 50bc27d30c03..441715f9ee3a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -118,8 +118,8 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 }
 
 /* i915_gem_context.c */
-int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
+int __must_check i915_gem_init_contexts(struct drm_i915_private *i915);
+void i915_gem_driver_release__contexts(struct drm_i915_private *i915);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
 			  struct drm_file *file);
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 0104f16b1327..e36af5a5ce42 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -65,7 +65,7 @@ void mock_context_close(struct i915_gem_context *ctx)
 
 void mock_init_contexts(struct drm_i915_private *i915)
 {
-	init_contexts(i915);
+	init_contexts(&i915->gem.contexts);
 }
 
 struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a3393d9fa84e..aed549906410 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -311,7 +311,8 @@ static void print_context_stats(struct seq_file *m,
 	struct file_stats kstats = {};
 	struct i915_gem_context *ctx;
 
-	list_for_each_entry(ctx, &i915->contexts.list, link) {
+	lockdep_assert_held(&i915->gem.contexts.mutex);
+	list_for_each_entry(ctx, &i915->gem.contexts.list, link) {
 		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
@@ -363,12 +364,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 
 	seq_putc(m, '\n');
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+	ret = mutex_lock_interruptible(&i915->gem.contexts.mutex);
 	if (ret)
 		return ret;
 
 	print_context_stats(m, i915);
-	mutex_unlock(&i915->drm.struct_mutex);
+	mutex_unlock(&i915->gem.contexts.mutex);
 
 	return 0;
 }
@@ -1563,16 +1564,15 @@ static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
 
 static int i915_context_status(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
+	struct drm_i915_private *i915 = node_to_i915(m->private);
 	struct i915_gem_context *ctx;
 	int ret;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&i915->gem.contexts.mutex);
 	if (ret)
 		return ret;
 
-	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+	list_for_each_entry(ctx, &i915->gem.contexts.list, link) {
 		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
@@ -1612,7 +1612,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		seq_putc(m, '\n');
 	}
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&i915->gem.contexts.mutex);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a7c62bc7950b..8cbf2461615f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1632,10 +1632,8 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	mutex_lock(&dev->struct_mutex);
 	i915_gem_context_close(file);
 	i915_gem_release(dev, file);
-	mutex_unlock(&dev->struct_mutex);
 
 	kfree(file_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 525e6366d46c..9097df986b32 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1590,23 +1590,6 @@ struct drm_i915_private {
 	struct mutex av_mutex;
 	int audio_power_refcount;
 
-	struct {
-		struct mutex mutex;
-		struct list_head list;
-		struct llist_head free_list;
-		struct work_struct free_work;
-
-		/* The hw wants to have a stable context identifier for the
-		 * lifetime of the context (for OA, PASID, faults, etc).
-		 * This is limited in execlists to 21 bits.
-		 */
-		struct ida hw_ida;
-#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
-#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
-#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
-		struct list_head hw_id_list;
-	} contexts;
-
 	u32 fdi_rx_config;
 
 	/* Shadow for DISPLAY_PHY_CONTROL which can't be safely read */
@@ -1780,6 +1763,13 @@ struct drm_i915_private {
 		 * off the idle_work.
 		 */
 		struct work_struct idle_work;
+
+		struct i915_gem_contexts {
+			struct mutex mutex;
+			struct list_head list;
+			struct llist_head free_list;
+			struct work_struct free_work;
+		} contexts;
 	} gem;
 
 	/* For i945gm vblank irq vs. C3 workaround */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 71ee4c710252..d57e43817a0d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1450,7 +1450,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_unlock;
 	}
 
-	ret = i915_gem_contexts_init(dev_priv);
+	ret = i915_gem_init_contexts(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
 		goto err_scratch;
@@ -1539,7 +1539,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	}
 err_context:
 	if (ret != -EIO)
-		i915_gem_contexts_fini(dev_priv);
+		i915_gem_driver_release__contexts(dev_priv);
 err_scratch:
 	i915_gem_fini_scratch(dev_priv);
 err_ggtt:
@@ -1616,7 +1616,7 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 {
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_engines_cleanup(dev_priv);
-	i915_gem_contexts_fini(dev_priv);
+	i915_gem_driver_release__contexts(dev_priv);
 	i915_gem_fini_scratch(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
@@ -1630,7 +1630,7 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv)
 
 	i915_gem_drain_freed_objects(dev_priv);
 
-	WARN_ON(!list_empty(&dev_priv->contexts.list));
+	WARN_ON(!list_empty(&dev_priv->gem.contexts.list));
 }
 
 void i915_gem_init_mmio(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3d43cde3ec50..0c6ab7b141bc 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1882,7 +1882,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
 #undef ctx_flexeuN
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx;
-	int i;
+	int i, err;
 
 	for (i = 2; i < ARRAY_SIZE(regs); i++)
 		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
@@ -1905,16 +1905,18 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
 	 * context. Contexts idle at the time of reconfiguration are not
 	 * trapped behind the barrier.
 	 */
-	list_for_each_entry(ctx, &i915->contexts.list, link) {
-		int err;
-
+	mutex_lock(&i915->gem.contexts.mutex);
+	list_for_each_entry(ctx, &i915->gem.contexts.list, link) {
 		if (ctx == i915->kernel_context)
 			continue;
 
 		err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs));
 		if (err)
-			return err;
+			break;
 	}
+	mutex_unlock(&i915->gem.contexts.mutex);
+	if (err)
+		return err;
 
 	/*
 	 * After updating all other contexts, we need to modify ourselves.
@@ -1923,7 +1925,6 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream,
 	 */
 	for_each_uabi_engine(engine, i915) {
 		struct intel_context *ce = engine->kernel_context;
-		int err;
 
 		if (engine->class != RENDER_CLASS)
 			continue;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index d8a3b180c084..b20f0b2b538a 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -142,9 +142,9 @@ static const struct attribute_group media_rc6_attr_group = {
 };
 #endif
 
-static int l3_access_valid(struct drm_i915_private *dev_priv, loff_t offset)
+static int l3_access_valid(struct drm_i915_private *i915, loff_t offset)
 {
-	if (!HAS_L3_DPF(dev_priv))
+	if (!HAS_L3_DPF(i915))
 		return -EPERM;
 
 	if (offset % 4 != 0)
@@ -162,31 +162,30 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
 	     loff_t offset, size_t count)
 {
 	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct drm_device *dev = &dev_priv->drm;
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
 	int slice = (int)(uintptr_t)attr->private;
 	int ret;
 
 	count = round_down(count, 4);
 
-	ret = l3_access_valid(dev_priv, offset);
+	ret = l3_access_valid(i915, offset);
 	if (ret)
 		return ret;
 
 	count = min_t(size_t, GEN7_L3LOG_SIZE - offset, count);
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = mutex_lock_interruptible(&i915->gem.contexts.mutex);
 	if (ret)
 		return ret;
 
-	if (dev_priv->l3_parity.remap_info[slice])
+	if (i915->l3_parity.remap_info[slice])
 		memcpy(buf,
-		       dev_priv->l3_parity.remap_info[slice] + (offset/4),
+		       i915->l3_parity.remap_info[slice] + offset / 4,
 		       count);
 	else
 		memset(buf, 0, count);
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&i915->gem.contexts.mutex);
 
 	return count;
 }
@@ -197,22 +196,23 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	      loff_t offset, size_t count)
 {
 	struct device *kdev = kobj_to_dev(kobj);
-	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	struct drm_device *dev = &dev_priv->drm;
+	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
 	struct i915_gem_context *ctx;
 	int slice = (int)(uintptr_t)attr->private;
 	u32 **remap_info;
 	int ret;
 
-	ret = l3_access_valid(dev_priv, offset);
+	count = round_down(count, 4);
+
+	ret = l3_access_valid(i915, offset);
 	if (ret)
 		return ret;
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = mutex_lock_interruptible(&i915->gem.contexts.mutex);
 	if (ret)
 		return ret;
 
-	remap_info = &dev_priv->l3_parity.remap_info[slice];
+	remap_info = &i915->l3_parity.remap_info[slice];
 	if (!*remap_info) {
 		*remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
 		if (!*remap_info) {
@@ -221,20 +221,20 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 		}
 	}
 
-	/* TODO: Ideally we really want a GPU reset here to make sure errors
+	/*
+	 * TODO: Ideally we really want a GPU reset here to make sure errors
 	 * aren't propagated. Since I cannot find a stable way to reset the GPU
 	 * at this point it is left as a TODO.
 	*/
 	memcpy(*remap_info + (offset/4), buf, count);
 
 	/* NB: We defer the remapping until we switch to the context */
-	list_for_each_entry(ctx, &dev_priv->contexts.list, link)
-		ctx->remap_slice |= (1<<slice);
+	list_for_each_entry(ctx, &i915->gem.contexts.list, link)
+		ctx->remap_slice |= BIT(slice);
 
 	ret = count;
-
 out:
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&i915->gem.contexts.mutex);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 01a89c071bf5..1ffad8c4b15d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -65,7 +65,7 @@ static void mock_device_release(struct drm_device *dev)
 	mutex_lock(&i915->drm.struct_mutex);
 	for_each_engine(engine, i915, id)
 		mock_engine_free(engine);
-	i915_gem_contexts_fini(i915);
+	i915_gem_driver_release__contexts(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	intel_timelines_fini(i915);
@@ -221,7 +221,7 @@ struct drm_i915_private *mock_gem_device(void)
 	return i915;
 
 err_context:
-	i915_gem_contexts_fini(i915);
+	i915_gem_driver_release__contexts(i915);
 err_engine:
 	mock_engine_free(i915->engine[RCS0]);
 err_unlock:
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 16/18] drm/i915/pmu: Use GT parked for estimating RC6 while asleep
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (13 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 15/18] drm/i915: Move context management under GEM Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 17/18] drm/i915: Drop GEM context as a direct link from i915_request Chris Wilson
                   ` (7 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

v2: Stick to using ktime_t
v3: Track user_wakerefs that interfere with the new
intel_gt_pm_wait_for_idle

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105010
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pm.c   |  19 ++++
 drivers/gpu/drm/i915/gt/intel_gt_types.h |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c      |  30 +++---
 drivers/gpu/drm/i915/i915_pmu.c          | 120 +++++++++++------------
 drivers/gpu/drm/i915/i915_pmu.h          |   4 +-
 5 files changed, 96 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 92e53c25424c..e62cf413a832 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -140,6 +140,21 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
 	return switch_to_kernel_context_sync(&i915->gt);
 }
 
+static void user_forcewake(struct intel_gt *gt, bool suspend)
+{
+	int count = atomic_read(&gt->user_wakeref);
+
+	if (likely(!count))
+		return;
+
+	intel_gt_pm_get(gt);
+	if (suspend)
+		atomic_sub(count, &gt->wakeref.count);
+	else
+		atomic_add(count, &gt->wakeref.count);
+	intel_gt_pm_put(gt);
+}
+
 void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	GEM_TRACE("\n");
@@ -147,6 +162,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
 	flush_workqueue(i915->wq);
 
+	user_forcewake(&i915->gt, true);
+
 	mutex_lock(&i915->drm.struct_mutex);
 
 	/*
@@ -261,6 +278,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	if (!i915_gem_load_power_context(i915))
 		goto err_wedged;
 
+	user_forcewake(&i915->gt, false);
+
 out_unlock:
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
 	mutex_unlock(&i915->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 8da7b9f1f46e..7db661bf75fa 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -49,6 +49,7 @@ struct intel_gt {
 	} timelines;
 
 	struct intel_wakeref wakeref;
+	atomic_t user_wakeref;
 
 	struct list_head closed_vma;
 	spinlock_t closed_lock; /* guards the list of closed_vma */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index aed549906410..a89937150f47 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3655,8 +3655,12 @@ i915_drop_caches_set(void *data, u64 val)
 
 		mutex_unlock(&i915->drm.struct_mutex);
 
-		if (ret == 0 && val & DROP_IDLE)
-			ret = intel_gt_pm_wait_for_idle(&i915->gt);
+		if (ret == 0 && val & DROP_IDLE) {
+			if (atomic_read(&i915->gt.user_wakeref))
+				ret = -EBUSY;
+			else
+				ret = intel_gt_pm_wait_for_idle(&i915->gt);
+		}
 	}
 
 	if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt))
@@ -3984,13 +3988,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	file->private_data =
-		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
-	intel_uncore_forcewake_user_get(&i915->uncore);
+	atomic_inc(&gt->user_wakeref);
+	intel_gt_pm_get(gt);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_get(gt->uncore);
 
 	return 0;
 }
@@ -3998,13 +4001,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
 static int i915_forcewake_release(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
 
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	intel_uncore_forcewake_user_put(&i915->uncore);
-	intel_runtime_pm_put(&i915->runtime_pm,
-			     (intel_wakeref_t)(uintptr_t)file->private_data);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_put(&i915->uncore);
+	intel_gt_pm_put(gt);
+	atomic_dec(&gt->user_wakeref);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index c7ee0ab180e8..2faefe9e630e 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -116,19 +116,51 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 	return enable;
 }
 
+static u64 __get_rc6(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	u64 val;
+
+	val = intel_rc6_residency_ns(i915,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
+
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+	return val;
+}
+
 void i915_pmu_gt_parked(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;
+	u64 val;
 
 	if (!pmu->base.event_init)
 		return;
 
 	spin_lock_irq(&pmu->lock);
+
+	val = 0;
+	if (pmu->sample[__I915_SAMPLE_RC6].cur)
+		val = __get_rc6(&i915->gt);
+
+	if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+		pmu->sample[__I915_SAMPLE_RC6].cur = val;
+	}
+	pmu->sleep_last = ktime_get();
+
 	/*
 	 * Signal sampling timer to stop if only engine events are enabled and
 	 * GPU went idle.
 	 */
 	pmu->timer_enabled = pmu_needs_timer(pmu, false);
+
 	spin_unlock_irq(&pmu->lock);
 }
 
@@ -143,6 +175,11 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu)
 	}
 }
 
+static inline s64 ktime_since(const ktime_t kt)
+{
+	return ktime_to_ns(ktime_sub(ktime_get(), kt));
+}
+
 void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 {
 	struct i915_pmu *pmu = &i915->pmu;
@@ -151,10 +188,22 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
 		return;
 
 	spin_lock_irq(&pmu->lock);
+
 	/*
 	 * Re-enable sampling timer when GPU goes active.
 	 */
 	__i915_pmu_maybe_start_timer(pmu);
+
+	/* Estimate how long we slept and accumulate that into rc6 counters */
+	if (pmu->sample[__I915_SAMPLE_RC6].cur) {
+		u64 val;
+
+		val = ktime_since(pmu->sleep_last);
+		val += pmu->sample[__I915_SAMPLE_RC6].cur;
+
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+	}
+
 	spin_unlock_irq(&pmu->lock);
 }
 
@@ -426,39 +475,18 @@ static int i915_pmu_event_init(struct perf_event *event)
 	return 0;
 }
 
-static u64 __get_rc6(struct intel_gt *gt)
-{
-	struct drm_i915_private *i915 = gt->i915;
-	u64 val;
-
-	val = intel_rc6_residency_ns(i915,
-				     IS_VALLEYVIEW(i915) ?
-				     VLV_GT_RENDER_RC6 :
-				     GEN6_GT_GFX_RC6);
-
-	if (HAS_RC6p(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-	if (HAS_RC6pp(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-	return val;
-}
-
 static u64 get_rc6(struct intel_gt *gt)
 {
-#if IS_ENABLED(CONFIG_PM)
 	struct drm_i915_private *i915 = gt->i915;
-	struct intel_runtime_pm *rpm = &i915->runtime_pm;
 	struct i915_pmu *pmu = &i915->pmu;
-	intel_wakeref_t wakeref;
 	unsigned long flags;
 	u64 val;
 
-	wakeref = intel_runtime_pm_get_if_in_use(rpm);
-	if (wakeref) {
+	spin_lock_irqsave(&pmu->lock, flags);
+
+	if (intel_gt_pm_get_if_awake(gt)) {
 		val = __get_rc6(gt);
-		intel_runtime_pm_put(rpm, wakeref);
+		intel_gt_pm_put(gt);
 
 		/*
 		 * If we are coming back from being runtime suspended we must
@@ -466,19 +494,13 @@ static u64 get_rc6(struct intel_gt *gt)
 		 * previously.
 		 */
 
-		spin_lock_irqsave(&pmu->lock, flags);
-
 		if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
 			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
 			pmu->sample[__I915_SAMPLE_RC6].cur = val;
 		} else {
 			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
 		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
 	} else {
-		struct device *kdev = rpm->kdev;
-
 		/*
 		 * We are runtime suspended.
 		 *
@@ -486,42 +508,16 @@ static u64 get_rc6(struct intel_gt *gt)
 		 * on top of the last known real value, as the approximated RC6
 		 * counter value.
 		 */
-		spin_lock_irqsave(&pmu->lock, flags);
 
-		/*
-		 * After the above branch intel_runtime_pm_get_if_in_use failed
-		 * to get the runtime PM reference we cannot assume we are in
-		 * runtime suspend since we can either: a) race with coming out
-		 * of it before we took the power.lock, or b) there are other
-		 * states than suspended which can bring us here.
-		 *
-		 * We need to double-check that we are indeed currently runtime
-		 * suspended and if not we cannot do better than report the last
-		 * known RC6 value.
-		 */
-		if (pm_runtime_status_suspended(kdev)) {
-			val = pm_runtime_suspended_time(kdev);
+		val = ktime_since(pmu->sleep_last);
+		val += pmu->sample[__I915_SAMPLE_RC6].cur;
 
-			if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-				pmu->suspended_time_last = val;
-
-			val -= pmu->suspended_time_last;
-			val += pmu->sample[__I915_SAMPLE_RC6].cur;
-
-			pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-		} else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		} else {
-			val = pmu->sample[__I915_SAMPLE_RC6].cur;
-		}
-
-		spin_unlock_irqrestore(&pmu->lock, flags);
+		pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
 	}
 
+	spin_unlock_irqrestore(&pmu->lock, flags);
+
 	return val;
-#else
-	return __get_rc6(gt);
-#endif
 }
 
 static u64 __i915_pmu_event_read(struct perf_event *event)
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 4fc4f2478301..067dbbf3bdff 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -97,9 +97,9 @@ struct i915_pmu {
 	 */
 	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
 	/**
-	 * @suspended_time_last: Cached suspend time from PM core.
+	 * @sleep_last: Last time GT parked for RC6 estimation.
 	 */
-	u64 suspended_time_last;
+	ktime_t sleep_last;
 	/**
 	 * @i915_attr: Memory block holding device attributes.
 	 */
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 17/18] drm/i915: Drop GEM context as a direct link from i915_request
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (14 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 16/18] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 13:39 ` [PATCH 18/18] drm/i915: Push the use-semaphore marker onto the intel_context Chris Wilson
                   ` (6 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

Keep the intel_context as being the primary state for i915_request, with
the GEM context a backpointer from the low level state for the rarer
cases we need client information. Our goal is to remove such references
to clients from the backend, and leave the HW submission agnostic to
client interfaces and self-contained.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  5 ++-
 drivers/gpu/drm/i915/gem/i915_gem_context.h   | 20 ---------
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  6 +--
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  6 +--
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   |  4 +-
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 +-
 drivers/gpu/drm/i915/gt/intel_context.h       | 24 +++++++++++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  4 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 42 +++++++++----------
 drivers/gpu/drm/i915/gt/intel_reset.c         | 38 +++++++++--------
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    | 12 +++---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 +--
 drivers/gpu/drm/i915/gvt/scheduler.c          | 18 ++++----
 drivers/gpu/drm/i915/i915_gem.c               |  6 +--
 drivers/gpu/drm/i915/i915_gpu_error.c         | 11 +++--
 drivers/gpu/drm/i915/i915_request.c           | 15 ++++---
 drivers/gpu/drm/i915/i915_request.h           |  3 +-
 18 files changed, 117 insertions(+), 107 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 51b8101e9836..1d7e9c32c2bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -69,8 +69,9 @@
 
 #include <drm/i915_drm.h>
 
-#include "gt/intel_lrc_reg.h"
+#include "gt/intel_context.h"
 #include "gt/intel_engine_user.h"
+#include "gt/intel_lrc_reg.h"
 
 #include "i915_gem_context.h"
 #include "i915_globals.h"
@@ -842,7 +843,7 @@ static void set_ppgtt_barrier(void *data)
 
 static int emit_ppgtt_update(struct i915_request *rq, void *data)
 {
-	struct i915_address_space *vm = rq->hw_context->vm;
+	struct i915_address_space *vm = rq->context->vm;
 	struct intel_engine_cs *engine = rq->engine;
 	u32 base = engine->mmio_base;
 	u32 *cs;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 441715f9ee3a..bfc0826591c5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -74,26 +74,6 @@ static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *c
 	clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
 }
 
-static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
-{
-	return test_bit(CONTEXT_BANNED, &ctx->flags);
-}
-
-static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx)
-{
-	set_bit(CONTEXT_BANNED, &ctx->flags);
-}
-
-static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx)
-{
-	return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
-}
-
-static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx)
-{
-	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
-}
-
 static inline bool
 i915_gem_context_user_engines(const struct i915_gem_context *ctx)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 87be27877e22..53e1f17ed3fe 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -142,10 +142,8 @@ struct i915_gem_context {
 	 * @flags: small set of booleans
 	 */
 	unsigned long flags;
-#define CONTEXT_BANNED			0
-#define CONTEXT_CLOSED			1
-#define CONTEXT_FORCE_SINGLE_SUBMISSION	2
-#define CONTEXT_USER_ENGINES		3
+#define CONTEXT_CLOSED			0
+#define CONTEXT_USER_ENGINES		1
 
 	struct mutex mutex;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b8432c3437e9..60bda03c53f5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -742,9 +742,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	unsigned int i, batch;
 	int err;
 
-	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
-		return -EIO;
-
 	INIT_LIST_HEAD(&eb->relocs);
 	INIT_LIST_HEAD(&eb->unbound);
 
@@ -2152,6 +2149,9 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
 	if (err)
 		return err;
 
+	if (unlikely(intel_context_is_banned(ce)))
+		return -EIO;
+
 	/*
 	 * Pinning the contexts may generate requests in order to acquire
 	 * GGTT space, so do this first before we reserve a seqno for
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 90db41d173df..a59d2cafc89b 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -300,7 +300,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
 
 	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
 		struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
-		struct intel_context *ce = rq->hw_context;
+		struct intel_context *ce = rq->context;
 		struct list_head *pos;
 
 		spin_lock(&b->irq_lock);
@@ -356,7 +356,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
 	 */
 	spin_lock(&b->irq_lock);
 	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
-		struct intel_context *ce = rq->hw_context;
+		struct intel_context *ce = rq->context;
 
 		list_del(&rq->signal_link);
 		if (list_empty(&ce->signals))
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index f55691d151ae..40e61184f24f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -296,7 +296,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 	int err;
 
 	/* Only suitable for use in remotely modifying this context */
-	GEM_BUG_ON(rq->hw_context == ce);
+	GEM_BUG_ON(rq->context == ce);
 
 	if (rq->timeline != tl) { /* beware timeline sharing */
 		err = mutex_lock_interruptible_nested(&tl->mutex,
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index dd742ac2fbdb..fe03b1eeab63 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -7,7 +7,9 @@
 #ifndef __INTEL_CONTEXT_H__
 #define __INTEL_CONTEXT_H__
 
+#include <linux/bitops.h>
 #include <linux/lockdep.h>
+#include <linux/types.h>
 
 #include "i915_active.h"
 #include "intel_context_types.h"
@@ -152,4 +154,26 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz)
 	return u64_to_ptr(struct intel_ring, sz);
 }
 
+static inline bool intel_context_is_banned(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_BANNED, &ce->flags);
+}
+
+static inline void intel_context_set_banned(struct intel_context *ce)
+{
+	set_bit(CONTEXT_BANNED, &ce->flags);
+}
+
+static inline bool
+intel_context_force_single_submission(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
+}
+
+static inline void
+intel_context_set_single_submission(struct intel_context *ce)
+{
+	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
+}
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index a3578ef24079..9c8c6dcd0f07 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -54,7 +54,9 @@ struct intel_context {
 	struct intel_timeline *timeline;
 
 	unsigned long flags;
-#define CONTEXT_ALLOC_BIT 0
+#define CONTEXT_ALLOC_BIT		0
+#define CONTEXT_BANNED			1
+#define CONTEXT_FORCE_SINGLE_SUBMISSION	2
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 8a9d3cd2c31c..971c6a10b648 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1458,7 +1458,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 		for (port = execlists->pending; (rq = *port); port++) {
 			/* Exclude any contexts already counted in active */
-			if (!intel_context_inflight_count(rq->hw_context))
+			if (!intel_context_inflight_count(rq->context))
 				engine->stats.active++;
 		}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 947bf994d586..e96dd049a2ca 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -477,7 +477,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		 * engine so that it can be moved across onto another physical
 		 * engine as load dictates.
 		 */
-		owner = rq->hw_context->engine;
+		owner = rq->context->engine;
 		if (likely(owner == engine)) {
 			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 			if (rq_prio(rq) != prio) {
@@ -538,7 +538,7 @@ static inline struct intel_engine_cs *
 __execlists_schedule_in(struct i915_request *rq)
 {
 	struct intel_engine_cs * const engine = rq->engine;
-	struct intel_context * const ce = rq->hw_context;
+	struct intel_context * const ce = rq->context;
 
 	intel_context_get(ce);
 
@@ -561,7 +561,7 @@ __execlists_schedule_in(struct i915_request *rq)
 static inline struct i915_request *
 execlists_schedule_in(struct i915_request *rq, int idx)
 {
-	struct intel_context * const ce = rq->hw_context;
+	struct intel_context * const ce = rq->context;
 	struct intel_engine_cs *old;
 
 	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
@@ -592,7 +592,7 @@ static inline void
 __execlists_schedule_out(struct i915_request *rq)
 {
 	struct intel_engine_cs * const engine = rq->engine;
-	struct intel_context * const ce = rq->hw_context;
+	struct intel_context * const ce = rq->context;
 
 	intel_engine_context_out(engine);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
@@ -616,7 +616,7 @@ __execlists_schedule_out(struct i915_request *rq)
 static inline void
 execlists_schedule_out(struct i915_request *rq)
 {
-	struct intel_context * const ce = rq->hw_context;
+	struct intel_context * const ce = rq->context;
 	struct intel_engine_cs *cur, *old;
 
 	trace_i915_request_out(rq);
@@ -634,7 +634,7 @@ execlists_schedule_out(struct i915_request *rq)
 
 static u64 execlists_update_context(const struct i915_request *rq)
 {
-	struct intel_context *ce = rq->hw_context;
+	struct intel_context *ce = rq->context;
 	u64 desc;
 
 	ce->lrc_reg_state[CTX_RING_TAIL + 1] =
@@ -709,10 +709,10 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 		return false;
 
 	for (port = execlists->pending; (rq = *port); port++) {
-		if (ce == rq->hw_context)
+		if (ce == rq->context)
 			return false;
 
-		ce = rq->hw_context;
+		ce = rq->context;
 		if (i915_request_completed(rq))
 			continue;
 
@@ -765,7 +765,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 static bool ctx_single_port_submission(const struct intel_context *ce)
 {
 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
-		i915_gem_context_force_single_submission(ce->gem_context));
+		intel_context_force_single_submission(ce));
 }
 
 static bool can_merge_ctx(const struct intel_context *prev,
@@ -786,7 +786,7 @@ static bool can_merge_rq(const struct i915_request *prev,
 	GEM_BUG_ON(prev == next);
 	GEM_BUG_ON(!assert_priority_queue(prev, next));
 
-	if (!can_merge_ctx(prev->hw_context, next->hw_context))
+	if (!can_merge_ctx(prev->context, next->context))
 		return false;
 
 	return true;
@@ -1080,7 +1080,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * tendency to ignore us rewinding the TAIL to the
 			 * end of an earlier request.
 			 */
-			last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+			last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 			last = NULL;
 		} else if (need_timeslice(engine, last) &&
 			   !timer_pending(&engine->execlists.timer)) {
@@ -1152,7 +1152,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 		GEM_BUG_ON(rq != ve->request);
 		GEM_BUG_ON(rq->engine != &ve->base);
-		GEM_BUG_ON(rq->hw_context != &ve->context);
+		GEM_BUG_ON(rq->context != &ve->context);
 
 		if (rq_prio(rq) >= queue_prio(execlists)) {
 			if (!virtual_matches(ve, rq, engine)) {
@@ -1271,7 +1271,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * same LRCA, i.e. we must submit 2 different
 				 * contexts if we submit 2 ELSP.
 				 */
-				if (last->hw_context == rq->hw_context)
+				if (last->context == rq->context)
 					goto done;
 
 				/*
@@ -1281,8 +1281,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * the same context (even though a different
 				 * request) to the second port.
 				 */
-				if (ctx_single_port_submission(last->hw_context) ||
-				    ctx_single_port_submission(rq->hw_context))
+				if (ctx_single_port_submission(last->context) ||
+				    ctx_single_port_submission(rq->context))
 					goto done;
 
 				*port = execlists_schedule_in(last, port - execlists->pending);
@@ -1754,7 +1754,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 static int emit_pdps(struct i915_request *rq)
 {
 	const struct intel_engine_cs * const engine = rq->engine;
-	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm);
+	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
 	int err, i;
 	u32 *cs;
 
@@ -1809,7 +1809,7 @@ static int execlists_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+	GEM_BUG_ON(!intel_context_is_pinned(request->context));
 
 	/*
 	 * Flush enough space to reduce the likelihood of waiting after
@@ -1827,7 +1827,7 @@ static int execlists_request_alloc(struct i915_request *request)
 	 */
 
 	/* Unconditionally invalidate GPU caches and TLBs. */
-	if (i915_vm_is_4lvl(request->hw_context->vm))
+	if (i915_vm_is_4lvl(request->context->vm))
 		ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
 	else
 		ret = emit_pdps(request);
@@ -2279,14 +2279,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 static struct i915_request *active_request(struct i915_request *rq)
 {
 	const struct list_head * const list = &rq->timeline->requests;
-	const struct intel_context * const ce = rq->hw_context;
+	const struct intel_context * const ce = rq->context;
 	struct i915_request *active = NULL;
 
 	list_for_each_entry_from_reverse(rq, list, link) {
 		if (i915_request_completed(rq))
 			break;
 
-		if (rq->hw_context != ce)
+		if (rq->context != ce)
 			break;
 
 		active = rq;
@@ -2316,7 +2316,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	if (!rq)
 		goto unwind;
 
-	ce = rq->hw_context;
+	ce = rq->context;
 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
 	rq = active_request(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 077716442c90..629bb0309469 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -40,28 +40,32 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
 static void engine_skip_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
-	struct i915_gem_context *hung_ctx = rq->gem_context;
+	struct intel_context *hung_ctx = rq->context;
 
 	lockdep_assert_held(&engine->active.lock);
 
 	if (!i915_request_is_active(rq))
-		return;
+		rq = list_first_entry(&engine->active.requests,
+				      typeof(*rq), sched.link);
 
 	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
-		if (rq->gem_context == hung_ctx)
+		if (rq->context == hung_ctx)
 			i915_request_skip(rq, -EIO);
 }
 
-static void client_mark_guilty(struct drm_i915_file_private *file_priv,
-			       const struct i915_gem_context *ctx)
+static void client_mark_guilty(struct i915_request *rq, bool banned)
 {
-	unsigned int score;
+	struct i915_gem_context *ctx = rq->context->gem_context;
+	struct drm_i915_file_private *file_priv = ctx->file_priv;
 	unsigned long prev_hang;
+	unsigned int score;
 
-	if (i915_gem_context_is_banned(ctx))
+	if (IS_ERR_OR_NULL(file_priv))
+		return;
+
+	score = 0;
+	if (banned)
 		score = I915_CLIENT_SCORE_CONTEXT_BAN;
-	else
-		score = 0;
 
 	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
 	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
@@ -76,8 +80,9 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
 	}
 }
 
-static bool context_mark_guilty(struct i915_gem_context *ctx)
+static bool mark_guilty(struct i915_request *rq)
 {
+	struct i915_gem_context *ctx = rq->context->gem_context;
 	unsigned long prev_hang;
 	bool banned;
 	int i;
@@ -101,18 +106,17 @@ static bool context_mark_guilty(struct i915_gem_context *ctx)
 	if (banned) {
 		DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
 				 ctx->name, atomic_read(&ctx->guilty_count));
-		i915_gem_context_set_banned(ctx);
+		intel_context_set_banned(rq->context);
 	}
 
-	if (!IS_ERR_OR_NULL(ctx->file_priv))
-		client_mark_guilty(ctx->file_priv, ctx);
+	client_mark_guilty(rq, banned);
 
 	return banned;
 }
 
-static void context_mark_innocent(struct i915_gem_context *ctx)
+static void mark_innocent(struct i915_request *rq)
 {
-	atomic_inc(&ctx->active_count);
+	atomic_inc(&rq->context->gem_context->active_count);
 }
 
 void __i915_request_reset(struct i915_request *rq, bool guilty)
@@ -128,11 +132,11 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
 
 	if (guilty) {
 		i915_request_skip(rq, -EIO);
-		if (context_mark_guilty(rq->gem_context))
+		if (mark_guilty(rq))
 			engine_skip_context(rq);
 	} else {
 		dma_fence_set_error(&rq->fence, -EAGAIN);
-		context_mark_innocent(rq->gem_context);
+		mark_innocent(rq);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 8ce5a55427c1..5635dca94f06 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1649,7 +1649,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 
 	*cs++ = MI_NOOP;
 	*cs++ = MI_SET_CONTEXT;
-	*cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
+	*cs++ = i915_ggtt_offset(rq->context->state) | flags;
 	/*
 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 	 * WaMiSetContext_Hang:snb,ivb,vlv
@@ -1719,7 +1719,7 @@ static int remap_l3_slice(struct i915_request *rq, int slice)
 
 static int remap_l3(struct i915_request *rq)
 {
-	struct i915_gem_context *ctx = rq->gem_context;
+	struct i915_gem_context *ctx = rq->context->gem_context;
 	int i, err;
 
 	if (!ctx->remap_slice)
@@ -1741,7 +1741,7 @@ static int remap_l3(struct i915_request *rq)
 static int switch_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
-	struct i915_address_space *vm = vm_alias(rq->hw_context);
+	struct i915_address_space *vm = vm_alias(rq->context);
 	unsigned int unwind_mm = 0;
 	u32 hw_flags = 0;
 	int ret;
@@ -1778,7 +1778,7 @@ static int switch_context(struct i915_request *rq)
 		}
 	}
 
-	if (rq->hw_context->state) {
+	if (rq->context->state) {
 		GEM_BUG_ON(engine->id != RCS0);
 
 		/*
@@ -1788,7 +1788,7 @@ static int switch_context(struct i915_request *rq)
 		 * as nothing actually executes using the kernel context; it
 		 * is purely used for flushing user contexts.
 		 */
-		if (i915_gem_context_is_kernel(rq->gem_context))
+		if (i915_gem_context_is_kernel(rq->context->gem_context))
 			hw_flags = MI_RESTORE_INHIBIT;
 
 		ret = mi_set_context(rq, hw_flags);
@@ -1839,7 +1839,7 @@ static int ring_request_alloc(struct i915_request *request)
 {
 	int ret;
 
-	GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+	GEM_BUG_ON(!intel_context_is_pinned(request->context));
 	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index a37afc6266ec..2fe57151fa9e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -464,7 +464,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
 	struct intel_guc_client *client = guc->execbuf_client;
 	struct intel_engine_cs *engine = rq->engine;
-	u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
+	u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
 	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
 	guc_wq_item_append(client, engine->guc_id, ctx_desc,
@@ -555,7 +555,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
-			if (last && rq->hw_context != last->hw_context) {
+			if (last && rq->context != last->context) {
 				if (port == last_port)
 					goto done;
 
@@ -658,7 +658,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 		stalled = false;
 
 	__i915_request_reset(rq, stalled);
-	intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
+	intel_lr_context_reset(engine, rq->context, rq->head, stalled);
 
 out_unlock:
 	spin_unlock_irqrestore(&engine->active.lock, flags);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 1a28e3666951..3bf637302829 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -58,7 +58,7 @@ static void set_context_pdp_root_pointer(
 static void update_shadow_pdps(struct intel_vgpu_workload *workload)
 {
 	struct drm_i915_gem_object *ctx_obj =
-		workload->req->hw_context->state->obj;
+		workload->req->context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 
@@ -129,7 +129,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	struct intel_gvt *gvt = vgpu->gvt;
 	int ring_id = workload->ring_id;
 	struct drm_i915_gem_object *ctx_obj =
-		workload->req->hw_context->state->obj;
+		workload->req->context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 	void *dst;
@@ -204,9 +204,9 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	return 0;
 }
 
-static inline bool is_gvt_request(struct i915_request *req)
+static inline bool is_gvt_request(struct i915_request *rq)
 {
-	return i915_gem_context_force_single_submission(req->gem_context);
+	return intel_context_force_single_submission(rq->context);
 }
 
 static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id)
@@ -306,7 +306,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 	u32 *cs;
 	int err;
 
-	if (IS_GEN(req->i915, 9) && is_inhibit_context(req->hw_context))
+	if (IS_GEN(req->i915, 9) && is_inhibit_context(req->context))
 		intel_vgpu_restore_inhibit_context(vgpu, req);
 
 	/*
@@ -529,7 +529,7 @@ static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 		container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx);
 	struct i915_request *rq = workload->req;
 	struct execlist_ring_context *shadow_ring_context =
-		(struct execlist_ring_context *)rq->hw_context->lrc_reg_state;
+		(struct execlist_ring_context *)rq->context->lrc_reg_state;
 
 	shadow_ring_context->bb_per_ctx_ptr.val =
 		(shadow_ring_context->bb_per_ctx_ptr.val &
@@ -786,7 +786,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 	struct i915_request *rq = workload->req;
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
-	struct drm_i915_gem_object *ctx_obj = rq->hw_context->state->obj;
+	struct drm_i915_gem_object *ctx_obj = rq->context->state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 	void *src;
@@ -1227,8 +1227,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		goto out_unlock;
 	}
 
-	i915_gem_context_set_force_single_submission(ctx);
-
 	i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm));
 
 	for_each_engine(engine, i915, i) {
@@ -1243,6 +1241,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 			goto out_shadow_ctx;
 		}
 
+		intel_context_set_single_submission(ce);
+
 		if (!USES_GUC_SUBMISSION(i915)) { /* Max ring buffer size */
 			const unsigned int ring_size = 512 * SZ_4K;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d57e43817a0d..6ed02cc0de36 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1313,9 +1313,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 			continue;
 
 		/* We want to be able to unbind the state from the GGTT */
-		GEM_BUG_ON(intel_context_is_pinned(rq->hw_context));
+		GEM_BUG_ON(intel_context_is_pinned(rq->context));
 
-		state = rq->hw_context->state;
+		state = rq->context->state;
 		if (!state)
 			continue;
 
@@ -1367,7 +1367,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 		if (!rq)
 			continue;
 
-		ce = rq->hw_context;
+		ce = rq->context;
 		i915_request_put(rq);
 		intel_context_put(ce);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3b5bfe50e00a..ee9ebd6c1573 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1169,7 +1169,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 static void record_request(const struct i915_request *request,
 			   struct drm_i915_error_request *erq)
 {
-	const struct i915_gem_context *ctx = request->gem_context;
+	const struct i915_gem_context *ctx = request->context->gem_context;
 
 	erq->flags = request->fence.flags;
 	erq->context = request->fence.context;
@@ -1181,7 +1181,7 @@ static void record_request(const struct i915_request *request,
 	erq->tail = request->tail;
 
 	rcu_read_lock();
-	erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0;
+	erq->pid = ctx && ctx->pid ? pid_nr(ctx->pid) : 0;
 	rcu_read_unlock();
 }
 
@@ -1249,7 +1249,10 @@ static void error_record_engine_execlists(const struct intel_engine_cs *engine,
 static bool record_context(struct drm_i915_error_context *e,
 			   const struct i915_request *rq)
 {
-	const struct i915_gem_context *ctx = rq->gem_context;
+	const struct i915_gem_context *ctx = rq->context->gem_context;
+
+	if (!ctx)
+		return false;
 
 	if (ctx->pid) {
 		struct task_struct *task;
@@ -1403,7 +1406,7 @@ gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
 		capture = request_record_user_bo(request, ee, capture);
 
 		capture = capture_vma(capture,
-				      request->hw_context->state,
+				      request->context->state,
 				      &ee->ctx);
 
 		capture = capture_vma(capture,
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 7170ccb3c677..c6ac6b912595 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -72,7 +72,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		return "signaled";
 
-	return to_request(fence)->gem_context->name ?: "[i915]";
+	return to_request(fence)->context->gem_context->name ?: "[i915]";
 }
 
 static bool i915_fence_signaled(struct dma_fence *fence)
@@ -285,8 +285,8 @@ static bool i915_request_retire(struct i915_request *rq)
 	i915_request_remove_from_client(rq);
 	list_del(&rq->link);
 
-	intel_context_exit(rq->hw_context);
-	intel_context_unpin(rq->hw_context);
+	intel_context_exit(rq->context);
+	intel_context_unpin(rq->context);
 
 	free_capture_list(rq);
 	i915_sched_node_fini(&rq->sched);
@@ -370,7 +370,7 @@ void __i915_request_submit(struct i915_request *request)
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&engine->active.lock);
 
-	if (i915_gem_context_is_banned(request->gem_context))
+	if (intel_context_is_banned(request->context))
 		i915_request_skip(request, -EIO);
 
 	/*
@@ -633,8 +633,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 		goto err_free;
 
 	rq->i915 = ce->engine->i915;
-	rq->hw_context = ce;
-	rq->gem_context = ce->gem_context;
+	rq->context = ce;
 	rq->engine = ce->engine;
 	rq->ring = ce->ring;
 	rq->timeline = tl;
@@ -853,7 +852,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 						       &from->submit,
 						       I915_FENCE_GFP);
 	} else if (intel_engine_has_semaphores(to->engine) &&
-		   to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) {
+		   to->context->gem_context->sched.priority >= I915_PRIORITY_NORMAL) {
 		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
 	} else {
 		ret = i915_sw_fence_await_dma_fence(&to->submit,
@@ -1178,7 +1177,7 @@ void __i915_request_queue(struct i915_request *rq,
 
 void i915_request_add(struct i915_request *rq)
 {
-	struct i915_sched_attr attr = rq->gem_context->sched;
+	struct i915_sched_attr attr = rq->context->gem_context->sched;
 	struct intel_timeline * const tl = rq->timeline;
 	struct i915_request *prev;
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8ac6e1226a56..fd41abbfc75b 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -109,9 +109,8 @@ struct i915_request {
 	 * i915_request_free() will then decrement the refcount on the
 	 * context.
 	 */
-	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
-	struct intel_context *hw_context;
+	struct intel_context *context;
 	struct intel_ring *ring;
 	struct intel_timeline *timeline;
 	struct list_head signal_link;
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 18/18] drm/i915: Push the use-semaphore marker onto the intel_context
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (15 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 17/18] drm/i915: Drop GEM context as a direct link from i915_request Chris Wilson
@ 2019-08-12 13:39 ` Chris Wilson
  2019-08-12 14:11 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests Patchwork
                   ` (5 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 13:39 UTC (permalink / raw)
  To: intel-gfx

Instead of rummaging through the intel_context to peek at the GEM
context in the middle of request submission to decide whether to use
semaphores, store that information on the intel_context itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 52 +++++++++++++------
 drivers/gpu/drm/i915/gt/intel_context.c       |  3 ++
 drivers/gpu/drm/i915/gt/intel_context.h       | 15 ++++++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  5 +-
 drivers/gpu/drm/i915/i915_request.c           |  8 ++-
 5 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 1d7e9c32c2bb..188935b8063f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1603,6 +1603,40 @@ get_engines(struct i915_gem_context *ctx,
 	return err;
 }
 
+static void __apply_priority(struct intel_context *ce, void *arg)
+{
+	struct i915_gem_context *ctx = arg;
+
+	if (intel_context_use_semaphores(ce) &&
+	    ctx->sched.priority < I915_PRIORITY_NORMAL)
+		intel_context_clear_use_semaphores(ce);
+}
+
+static int set_priority(struct i915_gem_context *ctx,
+			const struct drm_i915_gem_context_param *args)
+{
+	s64 priority = args->value;
+
+	if (args->size)
+		return -EINVAL;
+
+	if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+		return -ENODEV;
+
+	if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
+	    priority < I915_CONTEXT_MIN_USER_PRIORITY)
+		return -EINVAL;
+
+	if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
+	    !capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	ctx->sched.priority = I915_USER_PRIORITY(priority);
+	context_apply_all(ctx, __apply_priority, NULL);
+
+	return 0;
+}
+
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
 			struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
@@ -1649,23 +1683,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
 		break;
 
 	case I915_CONTEXT_PARAM_PRIORITY:
-		{
-			s64 priority = args->value;
-
-			if (args->size)
-				ret = -EINVAL;
-			else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
-				ret = -ENODEV;
-			else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
-				 priority < I915_CONTEXT_MIN_USER_PRIORITY)
-				ret = -EINVAL;
-			else if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
-				 !capable(CAP_SYS_NICE))
-				ret = -EPERM;
-			else
-				ctx->sched.priority =
-					I915_USER_PRIORITY(priority);
-		}
+		ret = set_priority(ctx, args);
 		break;
 
 	case I915_CONTEXT_PARAM_SSEU:
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 40e61184f24f..bde5d0917903 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -227,6 +227,9 @@ intel_context_init(struct intel_context *ce,
 	ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm);
 	if (ctx->timeline)
 		ce->timeline = intel_timeline_get(ctx->timeline);
+	if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
+	    intel_engine_has_semaphores(engine))
+		__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
 
 	ce->engine = engine;
 	ce->ops = engine->cops;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index fe03b1eeab63..7fdca6fe7a70 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -154,6 +154,21 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz)
 	return u64_to_ptr(struct intel_ring, sz);
 }
 
+static inline bool intel_context_use_semaphores(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline void intel_context_set_use_semaphores(struct intel_context *ce)
+{
+	set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline void intel_context_clear_use_semaphores(struct intel_context *ce)
+{
+	clear_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
 static inline bool intel_context_is_banned(const struct intel_context *ce)
 {
 	return test_bit(CONTEXT_BANNED, &ce->flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 9c8c6dcd0f07..278680833146 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -55,8 +55,9 @@ struct intel_context {
 
 	unsigned long flags;
 #define CONTEXT_ALLOC_BIT		0
-#define CONTEXT_BANNED			1
-#define CONTEXT_FORCE_SINGLE_SUBMISSION	2
+#define CONTEXT_USE_SEMAPHORES		1
+#define CONTEXT_BANNED			2
+#define CONTEXT_FORCE_SINGLE_SUBMISSION	3
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c6ac6b912595..f70f188607da 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -847,18 +847,16 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 			return ret;
 	}
 
-	if (to->engine == from->engine) {
+	if (to->engine == from->engine)
 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
 						       &from->submit,
 						       I915_FENCE_GFP);
-	} else if (intel_engine_has_semaphores(to->engine) &&
-		   to->context->gem_context->sched.priority >= I915_PRIORITY_NORMAL) {
+	else if (intel_context_use_semaphores(to->context))
 		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
-	} else {
+	else
 		ret = i915_sw_fence_await_dma_fence(&to->submit,
 						    &from->fence, 0,
 						    I915_FENCE_GFP);
-	}
 	if (ret < 0)
 		return ret;
 
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (16 preceding siblings ...)
  2019-08-12 13:39 ` [PATCH 18/18] drm/i915: Push the use-semaphore marker onto the intel_context Chris Wilson
@ 2019-08-12 14:11 ` Patchwork
  2019-08-12 14:21 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (4 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Patchwork @ 2019-08-12 14:11 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests
URL   : https://patchwork.freedesktop.org/series/65089/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
3d6da5baf737 drm/i915/guc: Use a local cancel_port_requests
1f0621c12a25 drm/i915: Push the wakeref->count deferral to the backend
51e08f66cc0f drm/i915/gt: Save/restore interrupts around breadcrumb disable
88b0e6e02044 drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
c21f3d612d80 drm/i915/gt: Track timeline activeness in enter/exit
05aa0e74a899 drm/i915/gt: Convert timeline tracking to spinlock
93e83bb04cbe drm/i915/gt: Guard timeline pinning with its own mutex
0fa69645057d drm/i915: Protect request retirement with timeline->mutex
6c8d9b314a1a drm/i915/gt: Mark context->active_count as protected by timeline->mutex
a55ae8c560fb drm/i915: Forgo last_fence active request tracking
11cdeb0ac83c drm/i915/overlay: Switch to using i915_active tracking
5efada370bbb drm/i915: Extract intel_frontbuffer active tracking
01e460adcc41 drm/i915: Markup expected timeline locks for i915_active
-:290: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#290: FILE: drivers/gpu/drm/i915/i915_active_types.h:28:
+	struct mutex *lock;

total: 0 errors, 0 warnings, 1 checks, 242 lines checked
65858a67e53d drm/i915: Remove logical HW ID
c30b4b8baf8c drm/i915: Move context management under GEM
-:411: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#411: FILE: drivers/gpu/drm/i915/i915_drv.h:1768:
+			struct mutex mutex;

total: 0 errors, 0 warnings, 1 checks, 547 lines checked
06be1ed317e9 drm/i915/pmu: Use GT parked for estimating RC6 while asleep
755f401dfa30 drm/i915: Drop GEM context as a direct link from i915_request
cf70cd608fef drm/i915: Push the use-semaphore marker onto the intel_context

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (17 preceding siblings ...)
  2019-08-12 14:11 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests Patchwork
@ 2019-08-12 14:21 ` Patchwork
  2019-08-12 14:31 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (3 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Patchwork @ 2019-08-12 14:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests
URL   : https://patchwork.freedesktop.org/series/65089/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915/guc: Use a local cancel_port_requests
Okay!

Commit: drm/i915: Push the wakeref->count deferral to the backend
Okay!

Commit: drm/i915/gt: Save/restore interrupts around breadcrumb disable
Okay!

Commit: drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
+drivers/gpu/drm/i915/gt/intel_lrc.c:983:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gt/intel_lrc.c:983:24: warning: expression using sizeof(void)
-./drivers/gpu/drm/i915/i915_utils.h:262:16: warning: expression using sizeof(void)
-./drivers/gpu/drm/i915/i915_utils.h:262:16: warning: expression using sizeof(void)
-./drivers/gpu/drm/i915/i915_utils.h:262:16: warning: expression using sizeof(void)
-./drivers/gpu/drm/i915/i915_utils.h:262:16: warning: expression using sizeof(void)
-./drivers/gpu/drm/i915/i915_utils.h:262:16: warning: expression using sizeof(void)
-./drivers/gpu/drm/i915/i915_utils.h:262:16: warning: expression using sizeof(void)
-./drivers/gpu/drm/i915/i915_utils.h:262:16: warning: expression using sizeof(void)
-./drivers/gpu/drm/i915/i915_utils.h:262:16: warning: expression using sizeof(void)
+./drivers/gpu/drm/i915/i915_utils.h:260:16: warning: expression using sizeof(void)
+./drivers/gpu/drm/i915/i915_utils.h:260:16: warning: expression using sizeof(void)
+./drivers/gpu/drm/i915/i915_utils.h:260:16: warning: expression using sizeof(void)
+./drivers/gpu/drm/i915/i915_utils.h:260:16: warning: expression using sizeof(void)
+./drivers/gpu/drm/i915/i915_utils.h:260:16: warning: expression using sizeof(void)
+./drivers/gpu/drm/i915/i915_utils.h:260:16: warning: expression using sizeof(void)
+./drivers/gpu/drm/i915/i915_utils.h:260:16: warning: expression using sizeof(void)
+./drivers/gpu/drm/i915/i915_utils.h:260:16: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_utils.h:262:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_utils.h:260:16: warning: expression using sizeof(void)

Commit: drm/i915/gt: Track timeline activeness in enter/exit
Okay!

Commit: drm/i915/gt: Convert timeline tracking to spinlock
Okay!

Commit: drm/i915/gt: Guard timeline pinning with its own mutex
Okay!

Commit: drm/i915: Protect request retirement with timeline->mutex
Okay!

Commit: drm/i915/gt: Mark context->active_count as protected by timeline->mutex
Okay!

Commit: drm/i915: Forgo last_fence active request tracking
Okay!

Commit: drm/i915/overlay: Switch to using i915_active tracking
Okay!

Commit: drm/i915: Extract intel_frontbuffer active tracking
Okay!

Commit: drm/i915: Markup expected timeline locks for i915_active
Okay!

Commit: drm/i915: Remove logical HW ID
Okay!

Commit: drm/i915: Move context management under GEM
+drivers/gpu/drm/i915/i915_sysfs.c:175:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_sysfs.c:175:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_sysfs.c:176:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_sysfs.c:176:17: warning: expression using sizeof(void)

Commit: drm/i915/pmu: Use GT parked for estimating RC6 while asleep
Okay!

Commit: drm/i915: Drop GEM context as a direct link from i915_request
Okay!

Commit: drm/i915: Push the use-semaphore marker onto the intel_context
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (18 preceding siblings ...)
  2019-08-12 14:21 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-08-12 14:31 ` Patchwork
  2019-08-12 20:23 ` [PATCH 01/18] " Daniele Ceraolo Spurio
                   ` (2 subsequent siblings)
  22 siblings, 0 replies; 27+ messages in thread
From: Patchwork @ 2019-08-12 14:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests
URL   : https://patchwork.freedesktop.org/series/65089/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6686 -> Patchwork_13986
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/

Known issues
------------

  Here are the changes found in Patchwork_13986 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_reloc@basic-write-read:
    - fi-icl-u3:          [PASS][1] -> [DMESG-WARN][2] ([fdo#107724]) +1 similar issue
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/fi-icl-u3/igt@gem_exec_reloc@basic-write-read.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/fi-icl-u3/igt@gem_exec_reloc@basic-write-read.html

  * igt@i915_module_load@reload:
    - fi-blb-e6850:       [PASS][3] -> [INCOMPLETE][4] ([fdo#107718])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/fi-blb-e6850/igt@i915_module_load@reload.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/fi-blb-e6850/igt@i915_module_load@reload.html

  * igt@i915_selftest@live_gtt:
    - fi-glk-dsi:         [PASS][5] -> [DMESG-WARN][6] ([fdo#111377])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/fi-glk-dsi/igt@i915_selftest@live_gtt.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/fi-glk-dsi/igt@i915_selftest@live_gtt.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-cml-u2:          [PASS][7] -> [FAIL][8] ([fdo#110627])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/fi-cml-u2/igt@kms_chamelium@dp-crc-fast.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/fi-cml-u2/igt@kms_chamelium@dp-crc-fast.html

  * igt@prime_vgem@basic-fence-flip:
    - fi-kbl-7500u:       [PASS][9] -> [SKIP][10] ([fdo#109271]) +23 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/fi-kbl-7500u/igt@prime_vgem@basic-fence-flip.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/fi-kbl-7500u/igt@prime_vgem@basic-fence-flip.html

  
#### Possible fixes ####

  * igt@gem_exec_reloc@basic-gtt-read:
    - fi-icl-u3:          [DMESG-WARN][11] ([fdo#107724]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/fi-icl-u3/igt@gem_exec_reloc@basic-gtt-read.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/fi-icl-u3/igt@gem_exec_reloc@basic-gtt-read.html

  * igt@i915_selftest@live_execlists:
    - fi-skl-gvtdvm:      [DMESG-FAIL][13] ([fdo#111108]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/fi-skl-gvtdvm/igt@i915_selftest@live_execlists.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/fi-skl-gvtdvm/igt@i915_selftest@live_execlists.html

  * igt@kms_busy@basic-flip-c:
    - fi-kbl-7500u:       [SKIP][15] ([fdo#109271] / [fdo#109278]) -> [PASS][16] +2 similar issues
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/fi-kbl-7500u/igt@kms_busy@basic-flip-c.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/fi-kbl-7500u/igt@kms_busy@basic-flip-c.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#105602]: https://bugs.freedesktop.org/show_bug.cgi?id=105602
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#110627]: https://bugs.freedesktop.org/show_bug.cgi?id=110627
  [fdo#111108]: https://bugs.freedesktop.org/show_bug.cgi?id=111108
  [fdo#111377]: https://bugs.freedesktop.org/show_bug.cgi?id=111377


Participating hosts (55 -> 46)
------------------------------

  Missing    (9): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-j1900 fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6686 -> Patchwork_13986

  CI-20190529: 20190529
  CI_DRM_6686: 3f5325e706a026d1e35e82642a6b93d374235b1b @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5127: f43f5fa12ac1b93febfe3eeb9e9985f5f3e2eff0 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13986: cf70cd608fef60d64d5a0499174e813f2695e88d @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

cf70cd608fef drm/i915: Push the use-semaphore marker onto the intel_context
755f401dfa30 drm/i915: Drop GEM context as a direct link from i915_request
06be1ed317e9 drm/i915/pmu: Use GT parked for estimating RC6 while asleep
c30b4b8baf8c drm/i915: Move context management under GEM
65858a67e53d drm/i915: Remove logical HW ID
01e460adcc41 drm/i915: Markup expected timeline locks for i915_active
5efada370bbb drm/i915: Extract intel_frontbuffer active tracking
11cdeb0ac83c drm/i915/overlay: Switch to using i915_active tracking
a55ae8c560fb drm/i915: Forgo last_fence active request tracking
6c8d9b314a1a drm/i915/gt: Mark context->active_count as protected by timeline->mutex
0fa69645057d drm/i915: Protect request retirement with timeline->mutex
93e83bb04cbe drm/i915/gt: Guard timeline pinning with its own mutex
05aa0e74a899 drm/i915/gt: Convert timeline tracking to spinlock
c21f3d612d80 drm/i915/gt: Track timeline activeness in enter/exit
88b0e6e02044 drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
51e08f66cc0f drm/i915/gt: Save/restore interrupts around breadcrumb disable
1f0621c12a25 drm/i915: Push the wakeref->count deferral to the backend
3d6da5baf737 drm/i915/guc: Use a local cancel_port_requests

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 11/18] drm/i915/overlay: Switch to using i915_active tracking
  2019-08-12 13:39 ` [PATCH 11/18] drm/i915/overlay: Switch to using i915_active tracking Chris Wilson
@ 2019-08-12 15:38   ` Matthew Auld
  0 siblings, 0 replies; 27+ messages in thread
From: Matthew Auld @ 2019-08-12 15:38 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development

On Mon, 12 Aug 2019 at 14:41, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Remove the raw i915_active_request tracking in favour of the higher
> level i915_active tracking for the sole purpose of making the lockless
> transition easier in later patches.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (19 preceding siblings ...)
  2019-08-12 14:31 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-08-12 20:23 ` Daniele Ceraolo Spurio
  2019-08-12 20:36   ` [PATCH] " Chris Wilson
  2019-08-19 10:06   ` [PATCH 01/18] " Janusz Krzysztofik
  2019-08-12 20:59 ` ✗ Fi.CI.IGT: failure for series starting with [01/18] " Patchwork
  2019-08-12 22:00 ` ✗ Fi.CI.BAT: failure for series starting with drm/i915/guc: Use a local cancel_port_requests (rev2) Patchwork
  22 siblings, 2 replies; 27+ messages in thread
From: Daniele Ceraolo Spurio @ 2019-08-12 20:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx



On 8/12/19 6:38 AM, Chris Wilson wrote:
> Since execlista and the guc have diverged in their port tracking, we
> cannot simply reuse the execlists cancellation code as it leads to
> unbalanced reference counting. Use a local simpler routine for the guc.
> 

LGTM, just wondering if we should put a comment somewhere to note that 
ce->inflight and execlists->pending are currently unused in the GuC 
path, so we don't incorrectly assume they're always set at certain 
stages of the submission. But anyway:

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Janusz, does this work for you?

Thanks,
Daniele

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine.h        |  3 ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  6 ++---
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 23 +++++++++++--------
>   3 files changed, 16 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index e1228b0e577f..4b6a1cf80706 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -136,9 +136,6 @@ execlists_active(const struct intel_engine_execlists *execlists)
>   	return READ_ONCE(*execlists->active);
>   }
>   
> -void
> -execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
> -
>   struct i915_request *
>   execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index b97047d58d3d..5c26c4ae139b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1297,8 +1297,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	}
>   }
>   
> -void
> -execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
> +static void
> +cancel_port_requests(struct intel_engine_execlists * const execlists)
>   {
>   	struct i915_request * const *port, *rq;
>   
> @@ -2355,7 +2355,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
>   
>   unwind:
>   	/* Push back any incomplete requests for replay after the reset. */
> -	execlists_cancel_port_requests(execlists);
> +	cancel_port_requests(execlists);
>   	__unwind_incomplete_requests(engine);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 449ca6357018..a37afc6266ec 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -517,11 +517,7 @@ static struct i915_request *schedule_in(struct i915_request *rq, int idx)
>   {
>   	trace_i915_request_in(rq, idx);
>   
> -	if (!rq->hw_context->inflight)
> -		rq->hw_context->inflight = rq->engine;
> -	intel_context_inflight_inc(rq->hw_context);
>   	intel_gt_pm_get(rq->engine->gt);
> -
>   	return i915_request_get(rq);
>   }
>   
> @@ -529,10 +525,6 @@ static void schedule_out(struct i915_request *rq)
>   {
>   	trace_i915_request_out(rq);
>   
> -	intel_context_inflight_dec(rq->hw_context);
> -	if (!intel_context_inflight_count(rq->hw_context))
> -		rq->hw_context->inflight = NULL;
> -
>   	intel_gt_pm_put(rq->engine->gt);
>   	i915_request_put(rq);
>   }
> @@ -636,6 +628,17 @@ static void guc_reset_prepare(struct intel_engine_cs *engine)
>   	__tasklet_disable_sync_once(&execlists->tasklet);
>   }
>   
> +static void
> +cancel_port_requests(struct intel_engine_execlists * const execlists)
> +{
> +	struct i915_request * const *port, *rq;
> +
> +	for (port = execlists->active; (rq = *port); port++)
> +		schedule_out(rq);
> +	execlists->active =
> +		memset(execlists->inflight, 0, sizeof(execlists->inflight));
> +}
> +
>   static void guc_reset(struct intel_engine_cs *engine, bool stalled)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -644,7 +647,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
>   
>   	spin_lock_irqsave(&engine->active.lock, flags);
>   
> -	execlists_cancel_port_requests(execlists);
> +	cancel_port_requests(execlists);
>   
>   	/* Push back any incomplete requests for replay after the reset. */
>   	rq = execlists_unwind_incomplete_requests(execlists);
> @@ -687,7 +690,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
>   	spin_lock_irqsave(&engine->active.lock, flags);
>   
>   	/* Cancel the requests on the HW and clear the ELSP tracker. */
> -	execlists_cancel_port_requests(execlists);
> +	cancel_port_requests(execlists);
>   
>   	/* Mark all executing requests as skipped. */
>   	list_for_each_entry(rq, &engine->active.requests, sched.link) {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH] drm/i915/guc: Use a local cancel_port_requests
  2019-08-12 20:23 ` [PATCH 01/18] " Daniele Ceraolo Spurio
@ 2019-08-12 20:36   ` Chris Wilson
  2019-08-19 10:06   ` [PATCH 01/18] " Janusz Krzysztofik
  1 sibling, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2019-08-12 20:36 UTC (permalink / raw)
  To: intel-gfx

Since execlists and the guc have diverged in their port tracking, we
cannot simply reuse the execlists cancellation code as it leads to
unbalanced reference counting. Use a local, simpler routine for the guc.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
Add reminders that we only use the execlists->inflight queue currently.
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  3 --
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  6 +--
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 37 ++++++++++++++-----
 3 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index e1228b0e577f..4b6a1cf80706 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -136,9 +136,6 @@ execlists_active(const struct intel_engine_execlists *execlists)
 	return READ_ONCE(*execlists->active);
 }
 
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
-
 struct i915_request *
 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b97047d58d3d..5c26c4ae139b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1297,8 +1297,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	}
 }
 
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
 	struct i915_request * const *port, *rq;
 
@@ -2355,7 +2355,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 
 unwind:
 	/* Push back any incomplete requests for replay after the reset. */
-	execlists_cancel_port_requests(execlists);
+	cancel_port_requests(execlists);
 	__unwind_incomplete_requests(engine);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 449ca6357018..26918261d60d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -517,11 +517,14 @@ static struct i915_request *schedule_in(struct i915_request *rq, int idx)
 {
 	trace_i915_request_in(rq, idx);
 
-	if (!rq->hw_context->inflight)
-		rq->hw_context->inflight = rq->engine;
-	intel_context_inflight_inc(rq->hw_context);
-	intel_gt_pm_get(rq->engine->gt);
+	/*
+	 * Currently we are not tracking the rq->context being inflight
+	 * (ce->inflight = rq->engine). It is only used by the execlists
+	 * backend at the moment, a similar counting strategy would be
+	 * required if we generalise the inflight tracking.
+	 */
 
+	intel_gt_pm_get(rq->engine->gt);
 	return i915_request_get(rq);
 }
 
@@ -529,10 +532,6 @@ static void schedule_out(struct i915_request *rq)
 {
 	trace_i915_request_out(rq);
 
-	intel_context_inflight_dec(rq->hw_context);
-	if (!intel_context_inflight_count(rq->hw_context))
-		rq->hw_context->inflight = NULL;
-
 	intel_gt_pm_put(rq->engine->gt);
 	i915_request_put(rq);
 }
@@ -556,6 +555,11 @@ static void __guc_dequeue(struct intel_engine_cs *engine)
 		last = NULL;
 	}
 
+	/*
+	 * We write directly into the execlists->inflight queue and don't use
+	 * the execlists->pending queue, as we don't have a distinct switch
+	 * event.
+	 */
 	port = first;
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
@@ -636,6 +640,19 @@ static void guc_reset_prepare(struct intel_engine_cs *engine)
 	__tasklet_disable_sync_once(&execlists->tasklet);
 }
 
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
+{
+	struct i915_request * const *port, *rq;
+
+	/* Note we are only using the inflight and not the pending queue */
+
+	for (port = execlists->active; (rq = *port); port++)
+		schedule_out(rq);
+	execlists->active =
+		memset(execlists->inflight, 0, sizeof(execlists->inflight));
+}
+
 static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -644,7 +661,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	execlists_cancel_port_requests(execlists);
+	cancel_port_requests(execlists);
 
 	/* Push back any incomplete requests for replay after the reset. */
 	rq = execlists_unwind_incomplete_requests(execlists);
@@ -687,7 +704,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
-	execlists_cancel_port_requests(execlists);
+	cancel_port_requests(execlists);
 
 	/* Mark all executing requests as skipped. */
 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (20 preceding siblings ...)
  2019-08-12 20:23 ` [PATCH 01/18] " Daniele Ceraolo Spurio
@ 2019-08-12 20:59 ` Patchwork
  2019-08-12 22:00 ` ✗ Fi.CI.BAT: failure for series starting with drm/i915/guc: Use a local cancel_port_requests (rev2) Patchwork
  22 siblings, 0 replies; 27+ messages in thread
From: Patchwork @ 2019-08-12 20:59 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests
URL   : https://patchwork.freedesktop.org/series/65089/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6686_full -> Patchwork_13986_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_13986_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_13986_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_13986_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_ctx_shared@q-out-order-bsd1:
    - shard-skl:          [PASS][1] -> [INCOMPLETE][2] +29 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-skl5/igt@gem_ctx_shared@q-out-order-bsd1.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-skl10/igt@gem_ctx_shared@q-out-order-bsd1.html

  * igt@gem_ctx_shared@q-smoketest-blt:
    - shard-skl:          [PASS][3] -> [DMESG-FAIL][4] +2 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-skl6/igt@gem_ctx_shared@q-smoketest-blt.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-skl5/igt@gem_ctx_shared@q-smoketest-blt.html
    - shard-iclb:         NOTRUN -> [DMESG-FAIL][5]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-iclb5/igt@gem_ctx_shared@q-smoketest-blt.html

  * igt@gem_ctx_shared@q-smoketest-bsd1:
    - shard-apl:          [PASS][6] -> [DMESG-FAIL][7] +2 similar issues
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-apl1/igt@gem_ctx_shared@q-smoketest-bsd1.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl8/igt@gem_ctx_shared@q-smoketest-bsd1.html
    - shard-glk:          [PASS][8] -> [DMESG-FAIL][9] +2 similar issues
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-glk8/igt@gem_ctx_shared@q-smoketest-bsd1.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-glk7/igt@gem_ctx_shared@q-smoketest-bsd1.html
    - shard-iclb:         [PASS][10] -> [DMESG-FAIL][11] +1 similar issue
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-iclb4/igt@gem_ctx_shared@q-smoketest-bsd1.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-iclb6/igt@gem_ctx_shared@q-smoketest-bsd1.html

  * igt@gem_exec_schedule@preempt-contexts-vebox:
    - shard-skl:          NOTRUN -> [INCOMPLETE][12]
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-skl6/igt@gem_exec_schedule@preempt-contexts-vebox.html

  * igt@gem_exec_schedule@smoketest-vebox:
    - shard-kbl:          [PASS][13] -> [DMESG-FAIL][14] +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-kbl1/igt@gem_exec_schedule@smoketest-vebox.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl7/igt@gem_exec_schedule@smoketest-vebox.html

  * igt@runner@aborted:
    - shard-kbl:          NOTRUN -> ([FAIL][15], [FAIL][16], [FAIL][17], [FAIL][18], [FAIL][19], [FAIL][20], [FAIL][21], [FAIL][22], [FAIL][23], [FAIL][24], [FAIL][25], [FAIL][26], [FAIL][27], [FAIL][28], [FAIL][29], [FAIL][30], [FAIL][31], [FAIL][32], [FAIL][33], [FAIL][34], [FAIL][35], [FAIL][36], [FAIL][37], [FAIL][38], [FAIL][39], [FAIL][40], [FAIL][41], [FAIL][42], [FAIL][43], [FAIL][44], [FAIL][45], [FAIL][46], [FAIL][47], [FAIL][48])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl7/igt@runner@aborted.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl7/igt@runner@aborted.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl7/igt@runner@aborted.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl2/igt@runner@aborted.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl6/igt@runner@aborted.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl2/igt@runner@aborted.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl3/igt@runner@aborted.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl6/igt@runner@aborted.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl1/igt@runner@aborted.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl3/igt@runner@aborted.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl7/igt@runner@aborted.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl7/igt@runner@aborted.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl4/igt@runner@aborted.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl3/igt@runner@aborted.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl4/igt@runner@aborted.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl4/igt@runner@aborted.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl1/igt@runner@aborted.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl6/igt@runner@aborted.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl3/igt@runner@aborted.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl3/igt@runner@aborted.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl4/igt@runner@aborted.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl1/igt@runner@aborted.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl2/igt@runner@aborted.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl2/igt@runner@aborted.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl6/igt@runner@aborted.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl7/igt@runner@aborted.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl1/igt@runner@aborted.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl1/igt@runner@aborted.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl7/igt@runner@aborted.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl1/igt@runner@aborted.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl6/igt@runner@aborted.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl4/igt@runner@aborted.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl2/igt@runner@aborted.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl7/igt@runner@aborted.html
    - shard-apl:          NOTRUN -> ([FAIL][49], [FAIL][50], [FAIL][51], [FAIL][52], [FAIL][53], [FAIL][54], [FAIL][55], [FAIL][56], [FAIL][57], [FAIL][58], [FAIL][59], [FAIL][60], [FAIL][61], [FAIL][62], [FAIL][63], [FAIL][64], [FAIL][65], [FAIL][66], [FAIL][67], [FAIL][68], [FAIL][69], [FAIL][70], [FAIL][71], [FAIL][72], [FAIL][73], [FAIL][74], [FAIL][75], [FAIL][76], [FAIL][77], [FAIL][78], [FAIL][79], [FAIL][80], [FAIL][81], [FAIL][82])
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl8/igt@runner@aborted.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl1/igt@runner@aborted.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl3/igt@runner@aborted.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl2/igt@runner@aborted.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl7/igt@runner@aborted.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl2/igt@runner@aborted.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl6/igt@runner@aborted.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl6/igt@runner@aborted.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl6/igt@runner@aborted.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl2/igt@runner@aborted.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl5/igt@runner@aborted.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl6/igt@runner@aborted.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl8/igt@runner@aborted.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl3/igt@runner@aborted.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl1/igt@runner@aborted.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl8/igt@runner@aborted.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl4/igt@runner@aborted.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl8/igt@runner@aborted.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl4/igt@runner@aborted.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl7/igt@runner@aborted.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl8/igt@runner@aborted.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl1/igt@runner@aborted.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl3/igt@runner@aborted.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl5/igt@runner@aborted.html
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl2/igt@runner@aborted.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl6/igt@runner@aborted.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl3/igt@runner@aborted.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl7/igt@runner@aborted.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl3/igt@runner@aborted.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl1/igt@runner@aborted.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl1/igt@runner@aborted.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl7/igt@runner@aborted.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl7/igt@runner@aborted.html
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl3/igt@runner@aborted.html

  

### Piglit changes ###

#### Possible regressions ####

  * security@initialized-texmemory (NEW):
    - pig-glk-j5005:      NOTRUN -> [INCOMPLETE][83] +4 similar issues
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/pig-glk-j5005/security@initialized-texmemory.html

  * shaders@activeprogram-bad-program (NEW):
    - pig-skl-6260u:      NOTRUN -> [INCOMPLETE][84] +4 similar issues
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/pig-skl-6260u/shaders@activeprogram-bad-program.html

  
New tests
---------

  New tests have been introduced between CI_DRM_6686_full and Patchwork_13986_full:

### New Piglit tests (5) ###

  * security@initialized-texmemory:
    - Statuses : 2 incomplete(s)
    - Exec time: [0.0] s

  * shaders@activeprogram-bad-program:
    - Statuses : 2 incomplete(s)
    - Exec time: [0.0] s

  * shaders@activeprogram-get:
    - Statuses : 2 incomplete(s)
    - Exec time: [0.0] s

  * shaders@attribute0:
    - Statuses : 2 incomplete(s)
    - Exec time: [0.0] s

  * shaders@createshaderprogram-bad-type:
    - Statuses : 2 incomplete(s)
    - Exec time: [0.0] s

  

Known issues
------------

  Here are the changes found in Patchwork_13986_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_shared@q-in-order-default:
    - shard-iclb:         [PASS][85] -> [INCOMPLETE][86] ([fdo#107713]) +27 similar issues
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-iclb5/igt@gem_ctx_shared@q-in-order-default.html
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-iclb3/igt@gem_ctx_shared@q-in-order-default.html

  * igt@gem_ctx_shared@q-in-order-vebox:
    - shard-glk:          [PASS][87] -> [INCOMPLETE][88] ([fdo#103359] / [k.org#198133]) +31 similar issues
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-glk3/igt@gem_ctx_shared@q-in-order-vebox.html
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-glk2/igt@gem_ctx_shared@q-in-order-vebox.html

  * igt@gem_ctx_shared@q-out-order-bsd1:
    - shard-apl:          [PASS][89] -> [INCOMPLETE][90] ([fdo#103927]) +30 similar issues
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-apl6/igt@gem_ctx_shared@q-out-order-bsd1.html
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl7/igt@gem_ctx_shared@q-out-order-bsd1.html

  * igt@gem_ctx_shared@q-out-order-render:
    - shard-kbl:          [PASS][91] -> [INCOMPLETE][92] ([fdo#103665]) +31 similar issues
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-kbl7/igt@gem_ctx_shared@q-out-order-render.html
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-kbl4/igt@gem_ctx_shared@q-out-order-render.html

  * igt@gem_exec_schedule@preempt-hang-blt:
    - shard-iclb:         [PASS][93] -> [INCOMPLETE][94] ([fdo#107713] / [fdo#109100])
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-iclb7/igt@gem_exec_schedule@preempt-hang-blt.html
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-iclb3/igt@gem_exec_schedule@preempt-hang-blt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw:
    - shard-iclb:         [PASS][95] -> [FAIL][96] ([fdo#103167])
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-iclb4/igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw.html
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-iclb6/igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - shard-apl:          [PASS][97] -> [DMESG-WARN][98] ([fdo#108566])
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-apl1/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl8/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html

  * igt@kms_plane_cursor@pipe-b-overlay-size-128:
    - shard-snb:          [PASS][99] -> [SKIP][100] ([fdo#109271])
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-snb1/igt@kms_plane_cursor@pipe-b-overlay-size-128.html
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-snb2/igt@kms_plane_cursor@pipe-b-overlay-size-128.html

  * igt@kms_plane_lowres@pipe-a-tiling-y:
    - shard-iclb:         [PASS][101] -> [FAIL][102] ([fdo#103166])
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-iclb1/igt@kms_plane_lowres@pipe-a-tiling-y.html
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-iclb8/igt@kms_plane_lowres@pipe-a-tiling-y.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [PASS][103] -> [SKIP][104] ([fdo#109441])
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-iclb2/igt@kms_psr@psr2_cursor_render.html
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-iclb7/igt@kms_psr@psr2_cursor_render.html

  * igt@prime_busy@hang-bsd2:
    - shard-iclb:         [PASS][105] -> [SKIP][106] ([fdo#109276]) +9 similar issues
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-iclb1/igt@prime_busy@hang-bsd2.html
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-iclb8/igt@prime_busy@hang-bsd2.html

  
#### Possible fixes ####

  * igt@gem_softpin@noreloc-s3:
    - shard-skl:          [INCOMPLETE][107] ([fdo#104108]) -> [PASS][108]
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-skl7/igt@gem_softpin@noreloc-s3.html
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-skl6/igt@gem_softpin@noreloc-s3.html

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size:
    - shard-skl:          [FAIL][109] ([fdo#102670] / [fdo#106081]) -> [PASS][110]
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-skl6/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size.html
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-skl4/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions-varying-size.html

  * igt@kms_flip@wf_vblank-ts-check-interruptible:
    - shard-hsw:          [INCOMPLETE][111] ([fdo#103540]) -> [PASS][112]
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-hsw2/igt@kms_flip@wf_vblank-ts-check-interruptible.html
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-hsw5/igt@kms_flip@wf_vblank-ts-check-interruptible.html

  * igt@kms_frontbuffer_tracking@fbc-farfromfence:
    - shard-apl:          [INCOMPLETE][113] ([fdo#103927]) -> [PASS][114]
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-apl4/igt@kms_frontbuffer_tracking@fbc-farfromfence.html
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-apl2/igt@kms_frontbuffer_tracking@fbc-farfromfence.html

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min:
    - shard-skl:          [FAIL][115] ([fdo#108145]) -> [PASS][116]
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-skl9/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-skl2/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html

  * igt@kms_vblank@pipe-a-wait-idle-hang:
    - shard-iclb:         [INCOMPLETE][117] ([fdo#107713]) -> [PASS][118]
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-iclb7/igt@kms_vblank@pipe-a-wait-idle-hang.html
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-iclb5/igt@kms_vblank@pipe-a-wait-idle-hang.html

  * igt@perf_pmu@rc6-runtime-pm-long:
    - shard-hsw:          [FAIL][119] ([fdo#105010]) -> [PASS][120]
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-hsw6/igt@perf_pmu@rc6-runtime-pm-long.html
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-hsw6/igt@perf_pmu@rc6-runtime-pm-long.html

  
#### Warnings ####

  * igt@gem_exec_schedule@preempt-contexts-bsd2:
    - shard-iclb:         [SKIP][121] ([fdo#109276]) -> [INCOMPLETE][122] ([fdo#107713])
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6686/shard-iclb6/igt@gem_exec_schedule@preempt-contexts-bsd2.html
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/shard-iclb1/igt@gem_exec_schedule@preempt-contexts-bsd2.html

  
  [fdo#102670]: https://bugs.freedesktop.org/show_bug.cgi?id=102670
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010
  [fdo#106081]: https://bugs.freedesktop.org/show_bug.cgi?id=106081
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fd

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13986/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with drm/i915/guc: Use a local cancel_port_requests (rev2)
  2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
                   ` (21 preceding siblings ...)
  2019-08-12 20:59 ` ✗ Fi.CI.IGT: failure for series starting with [01/18] " Patchwork
@ 2019-08-12 22:00 ` Patchwork
  22 siblings, 0 replies; 27+ messages in thread
From: Patchwork @ 2019-08-12 22:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with drm/i915/guc: Use a local cancel_port_requests (rev2)
URL   : https://patchwork.freedesktop.org/series/65089/
State : failure

== Summary ==

Applying: drm/i915/guc: Use a local cancel_port_requests
Applying: drm/i915: Push the wakeref->count deferral to the backend
Applying: drm/i915/gt: Save/restore interrupts around breadcrumb disable
Applying: drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
Applying: drm/i915/gt: Track timeline activeness in enter/exit
Applying: drm/i915/gt: Convert timeline tracking to spinlock
Applying: drm/i915/gt: Guard timeline pinning with its own mutex
Applying: drm/i915: Protect request retirement with timeline->mutex
Using index info to reconstruct a base tree...
M	drivers/gpu/drm/i915/gt/intel_gt.c
M	drivers/gpu/drm/i915/gt/intel_gt_types.h
M	drivers/gpu/drm/i915/gt/intel_ringbuffer.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/gt/intel_ringbuffer.c
Auto-merging drivers/gpu/drm/i915/gt/intel_gt_types.h
Auto-merging drivers/gpu/drm/i915/gt/intel_gt.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/gt/intel_gt.c
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch' to see the failed patch
Patch failed at 0008 drm/i915: Protect request retirement with timeline->mutex
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests
  2019-08-12 20:23 ` [PATCH 01/18] " Daniele Ceraolo Spurio
  2019-08-12 20:36   ` [PATCH] " Chris Wilson
@ 2019-08-19 10:06   ` Janusz Krzysztofik
  1 sibling, 0 replies; 27+ messages in thread
From: Janusz Krzysztofik @ 2019-08-19 10:06 UTC (permalink / raw)
  To: Daniele Ceraolo Spurio; +Cc: intel-gfx

On Monday, August 12, 2019 10:23:29 PM CEST Daniele Ceraolo Spurio wrote:
> 
> On 8/12/19 6:38 AM, Chris Wilson wrote:
> > Since execlista and the guc have diverged in their port tracking, we
> > cannot simply reuse the execlists cancellation code as it leads to
> > unbalanced reference counting. Use a local simpler routine for the guc.
> > 
> 
> LGTM, just wondering if we should put a comment somewhere to note that 
> ce->inflight and execlists->pending are currently unused in the GuC 
> path, so we don't incorrectly assume they're always set at certain 
> stages of the submission. But anyway:
> 
> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> 
> Janusz, does this work for you?

Yes, since the patch contains my original fix for the unbalanced reference, 
it still works for me (resolves kernel panics observed).

Regarding other modifications included, I haven't had time to spend on that 
yet so I don't feel competent to talk about that.  I don't know how to test, I 
can only confirm I haven't noticed any unexpected behavior.

Thanks,
Janusz

PS. Sorry for late answer, I had no internet access last few days

> 
> Thanks,
> Daniele
> 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine.h        |  3 ---
> >   drivers/gpu/drm/i915/gt/intel_lrc.c           |  6 ++---
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 23 +++++++++++--------
> >   3 files changed, 16 insertions(+), 16 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/
i915/gt/intel_engine.h
> > index e1228b0e577f..4b6a1cf80706 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> > @@ -136,9 +136,6 @@ execlists_active(const struct intel_engine_execlists 
*execlists)
> >   	return READ_ONCE(*execlists->active);
> >   }
> >   
> > -void
> > -execlists_cancel_port_requests(struct intel_engine_execlists * const 
execlists);
> > -
> >   struct i915_request *
> >   execlists_unwind_incomplete_requests(struct intel_engine_execlists 
*execlists);
> >   
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/
gt/intel_lrc.c
> > index b97047d58d3d..5c26c4ae139b 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -1297,8 +1297,8 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
> >   	}
> >   }
> >   
> > -void
> > -execlists_cancel_port_requests(struct intel_engine_execlists * const 
execlists)
> > +static void
> > +cancel_port_requests(struct intel_engine_execlists * const execlists)
> >   {
> >   	struct i915_request * const *port, *rq;
> >   
> > @@ -2355,7 +2355,7 @@ static void __execlists_reset(struct intel_engine_cs 
*engine, bool stalled)
> >   
> >   unwind:
> >   	/* Push back any incomplete requests for replay after the reset. 
*/
> > -	execlists_cancel_port_requests(execlists);
> > +	cancel_port_requests(execlists);
> >   	__unwind_incomplete_requests(engine);
> >   }
> >   
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/
gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 449ca6357018..a37afc6266ec 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -517,11 +517,7 @@ static struct i915_request *schedule_in(struct 
i915_request *rq, int idx)
> >   {
> >   	trace_i915_request_in(rq, idx);
> >   
> > -	if (!rq->hw_context->inflight)
> > -		rq->hw_context->inflight = rq->engine;
> > -	intel_context_inflight_inc(rq->hw_context);
> >   	intel_gt_pm_get(rq->engine->gt);
> > -
> >   	return i915_request_get(rq);
> >   }
> >   
> > @@ -529,10 +525,6 @@ static void schedule_out(struct i915_request *rq)
> >   {
> >   	trace_i915_request_out(rq);
> >   
> > -	intel_context_inflight_dec(rq->hw_context);
> > -	if (!intel_context_inflight_count(rq->hw_context))
> > -		rq->hw_context->inflight = NULL;
> > -
> >   	intel_gt_pm_put(rq->engine->gt);
> >   	i915_request_put(rq);
> >   }
> > @@ -636,6 +628,17 @@ static void guc_reset_prepare(struct intel_engine_cs 
*engine)
> >   	__tasklet_disable_sync_once(&execlists->tasklet);
> >   }
> >   
> > +static void
> > +cancel_port_requests(struct intel_engine_execlists * const execlists)
> > +{
> > +	struct i915_request * const *port, *rq;
> > +
> > +	for (port = execlists->active; (rq = *port); port++)
> > +		schedule_out(rq);
> > +	execlists->active =
> > +		memset(execlists->inflight, 0, sizeof(execlists-
>inflight));
> > +}
> > +
> >   static void guc_reset(struct intel_engine_cs *engine, bool stalled)
> >   {
> >   	struct intel_engine_execlists * const execlists = &engine-
>execlists;
> > @@ -644,7 +647,7 @@ static void guc_reset(struct intel_engine_cs *engine, 
bool stalled)
> >   
> >   	spin_lock_irqsave(&engine->active.lock, flags);
> >   
> > -	execlists_cancel_port_requests(execlists);
> > +	cancel_port_requests(execlists);
> >   
> >   	/* Push back any incomplete requests for replay after the reset. 
*/
> >   	rq = execlists_unwind_incomplete_requests(execlists);
> > @@ -687,7 +690,7 @@ static void guc_cancel_requests(struct intel_engine_cs 
*engine)
> >   	spin_lock_irqsave(&engine->active.lock, flags);
> >   
> >   	/* Cancel the requests on the HW and clear the ELSP tracker. */
> > -	execlists_cancel_port_requests(execlists);
> > +	cancel_port_requests(execlists);
> >   
> >   	/* Mark all executing requests as skipped. */
> >   	list_for_each_entry(rq, &engine->active.requests, sched.link) {
> > 
> 




_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2019-08-19 10:06 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-12 13:38 [PATCH 01/18] drm/i915/guc: Use a local cancel_port_requests Chris Wilson
2019-08-12 13:38 ` [PATCH 02/18] drm/i915: Push the wakeref->count deferral to the backend Chris Wilson
2019-08-12 13:39 ` [PATCH 03/18] drm/i915/gt: Save/restore interrupts around breadcrumb disable Chris Wilson
2019-08-12 13:39 ` [PATCH 04/18] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
2019-08-12 13:39 ` [PATCH 05/18] drm/i915/gt: Track timeline activeness in enter/exit Chris Wilson
2019-08-12 13:39 ` [PATCH 06/18] drm/i915/gt: Convert timeline tracking to spinlock Chris Wilson
2019-08-12 13:39 ` [PATCH 07/18] drm/i915/gt: Guard timeline pinning with its own mutex Chris Wilson
2019-08-12 13:39 ` [PATCH 08/18] drm/i915: Protect request retirement with timeline->mutex Chris Wilson
2019-08-12 13:39 ` [PATCH 09/18] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
2019-08-12 13:39 ` [PATCH 10/18] drm/i915: Forgo last_fence active request tracking Chris Wilson
2019-08-12 13:39 ` [PATCH 11/18] drm/i915/overlay: Switch to using i915_active tracking Chris Wilson
2019-08-12 15:38   ` Matthew Auld
2019-08-12 13:39 ` [PATCH 12/18] drm/i915: Extract intel_frontbuffer active tracking Chris Wilson
2019-08-12 13:39 ` [PATCH 13/18] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
2019-08-12 13:39 ` [PATCH 14/18] drm/i915: Remove logical HW ID Chris Wilson
2019-08-12 13:39 ` [PATCH 15/18] drm/i915: Move context management under GEM Chris Wilson
2019-08-12 13:39 ` [PATCH 16/18] drm/i915/pmu: Use GT parked for estimating RC6 while asleep Chris Wilson
2019-08-12 13:39 ` [PATCH 17/18] drm/i915: Drop GEM context as a direct link from i915_request Chris Wilson
2019-08-12 13:39 ` [PATCH 18/18] drm/i915: Push the use-semaphore marker onto the intel_context Chris Wilson
2019-08-12 14:11 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/18] drm/i915/guc: Use a local cancel_port_requests Patchwork
2019-08-12 14:21 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-08-12 14:31 ` ✓ Fi.CI.BAT: success " Patchwork
2019-08-12 20:23 ` [PATCH 01/18] " Daniele Ceraolo Spurio
2019-08-12 20:36   ` [PATCH] " Chris Wilson
2019-08-19 10:06   ` [PATCH 01/18] " Janusz Krzysztofik
2019-08-12 20:59 ` ✗ Fi.CI.IGT: failure for series starting with [01/18] " Patchwork
2019-08-12 22:00 ` ✗ Fi.CI.BAT: failure for series starting with drm/i915/guc: Use a local cancel_port_requests (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.