All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
@ 2019-08-16  9:24 Chris Wilson
  2019-08-16  9:24 ` [PATCH 2/3] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
                   ` (7 more replies)
  0 siblings, 8 replies; 15+ messages in thread
From: Chris Wilson @ 2019-08-16  9:24 UTC (permalink / raw)
  To: intel-gfx

If we only call process_csb() from the tasklet, though we lose the
ability to bypass ksoftirqd interrupt processing on direct submission
paths, we can push it out of the irq-off spinlock.

The penalty is that we then allow schedule_out to be called concurrently
with schedule_in requiring us to handle the usage count (baked into the
pointer itself) atomically.

As we do kick the tasklets (via local_bh_enable()) after our submission,
there is a possibility there to see if we can pull the local softirq
processing back from the ksoftirqd.

v2: Store the 'switch_priority_hint' on submission, so that we can
safely check during process_csb().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  10 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 136 +++++++++++-------
 drivers/gpu/drm/i915/i915_utils.h             |  20 ++-
 5 files changed, 108 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index a632b20ec4d8..d8ce266c049f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -41,9 +41,7 @@ struct intel_context {
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
 #define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
-#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
-#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
-#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
+#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
 
 	struct i915_address_space *vm;
 	struct i915_gem_context *gem_context;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 957f27a2ec97..ba457c1c7dc0 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1459,7 +1459,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 		for (port = execlists->pending; (rq = *port); port++) {
 			/* Exclude any contexts already counted in active */
-			if (intel_context_inflight_count(rq->hw_context) == 1)
+			if (!intel_context_inflight_count(rq->hw_context))
 				engine->stats.active++;
 		}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9965a32601d6..5441aa9cb863 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -204,6 +204,16 @@ struct intel_engine_execlists {
 	 */
 	unsigned int port_mask;
 
+	/**
+	 * @switch_priority_hint: Second context priority.
+	 *
+	 * We submit multiple contexts to the HW simultaneously and would
+	 * like to occasionally switch between them to emulate timeslicing.
+	 * To know when timeslicing is suitable, we track the priority of
+	 * the context submitted second.
+	 */
+	int switch_priority_hint;
+
 	/**
 	 * @queue_priority_hint: Highest pending priority.
 	 *
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e9863f4d826b..8cb8c5303f42 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -547,27 +547,39 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
+static inline struct intel_engine_cs *
+__execlists_schedule_in(struct i915_request *rq)
+{
+	struct intel_engine_cs * const engine = rq->engine;
+	struct intel_context * const ce = rq->hw_context;
+
+	intel_context_get(ce);
+
+	intel_gt_pm_get(engine->gt);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+	intel_engine_context_in(engine);
+
+	return engine;
+}
+
 static inline struct i915_request *
 execlists_schedule_in(struct i915_request *rq, int idx)
 {
-	struct intel_context *ce = rq->hw_context;
-	int count;
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *old;
 
+	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
 	trace_i915_request_in(rq, idx);
 
-	count = intel_context_inflight_count(ce);
-	if (!count) {
-		intel_context_get(ce);
-		ce->inflight = rq->engine;
-
-		intel_gt_pm_get(ce->inflight->gt);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-		intel_engine_context_in(ce->inflight);
-	}
+	old = READ_ONCE(ce->inflight);
+	do {
+		if (!old) {
+			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+			break;
+		}
+	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
 
-	intel_context_inflight_inc(ce);
 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
-
 	return i915_request_get(rq);
 }
 
@@ -581,35 +593,45 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 }
 
 static inline void
-execlists_schedule_out(struct i915_request *rq)
+__execlists_schedule_out(struct i915_request *rq,
+			 struct intel_engine_cs * const engine)
 {
-	struct intel_context *ce = rq->hw_context;
+	struct intel_context * const ce = rq->hw_context;
 
-	GEM_BUG_ON(!intel_context_inflight_count(ce));
+	intel_engine_context_out(engine);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+	intel_gt_pm_put(engine->gt);
 
-	trace_i915_request_out(rq);
+	/*
+	 * If this is part of a virtual engine, its next request may
+	 * have been blocked waiting for access to the active context.
+	 * We have to kick all the siblings again in case we need to
+	 * switch (e.g. the next request is not runnable on this
+	 * engine). Hopefully, we will already have submitted the next
+	 * request before the tasklet runs and do not need to rebuild
+	 * each virtual tree and kick everyone again.
+	 */
+	if (ce->engine != engine)
+		kick_siblings(rq, ce);
 
-	intel_context_inflight_dec(ce);
-	if (!intel_context_inflight_count(ce)) {
-		intel_engine_context_out(ce->inflight);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
-		intel_gt_pm_put(ce->inflight->gt);
+	intel_context_put(ce);
+}
 
-		/*
-		 * If this is part of a virtual engine, its next request may
-		 * have been blocked waiting for access to the active context.
-		 * We have to kick all the siblings again in case we need to
-		 * switch (e.g. the next request is not runnable on this
-		 * engine). Hopefully, we will already have submitted the next
-		 * request before the tasklet runs and do not need to rebuild
-		 * each virtual tree and kick everyone again.
-		 */
-		ce->inflight = NULL;
-		if (rq->engine != ce->engine)
-			kick_siblings(rq, ce);
+static inline void
+execlists_schedule_out(struct i915_request *rq)
+{
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *cur, *old;
 
-		intel_context_put(ce);
-	}
+	trace_i915_request_out(rq);
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+
+	old = READ_ONCE(ce->inflight);
+	do
+		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
+	while (!try_cmpxchg(&ce->inflight, &old, cur));
+	if (!cur)
+		__execlists_schedule_out(rq, old);
 
 	i915_request_put(rq);
 }
@@ -684,6 +706,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 
 	trace_ports(execlists, msg, execlists->pending);
 
+	if (!execlists->pending[0])
+		return false;
+
 	if (execlists->pending[execlists_num_ports(execlists)])
 		return false;
 
@@ -942,11 +967,23 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
 }
 
 static bool
-enable_timeslice(struct intel_engine_cs *engine)
+switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+	if (list_is_last(&rq->sched.link, &engine->active.requests))
+		return INT_MIN;
+
+	return rq_prio(list_next_entry(rq, sched.link));
+}
+
+static bool
+enable_timeslice(const struct intel_engine_execlists *execlists)
 {
-	struct i915_request *last = last_active(&engine->execlists);
+	const struct i915_request *rq = *execlists->active;
 
-	return last && need_timeslice(engine, last);
+	if (i915_request_completed(rq))
+		return false;
+
+	return execlists->switch_priority_hint >= effective_prio(rq);
 }
 
 static void record_preemption(struct intel_engine_execlists *execlists)
@@ -1292,6 +1329,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		*port = execlists_schedule_in(last, port - execlists->pending);
 		memset(port + 1, 0, (last_port - port) * sizeof(*port));
 		execlists_submit_ports(engine);
+		execlists->switch_priority_hint =
+			switch_prio(engine, *execlists->pending);
 	} else {
 		ring_set_paused(engine, 0);
 	}
@@ -1356,7 +1395,6 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
-	lockdep_assert_held(&engine->active.lock);
 	GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
 
 	/*
@@ -1427,15 +1465,14 @@ static void process_csb(struct intel_engine_cs *engine)
 				       execlists->pending,
 				       execlists_num_ports(execlists) *
 				       sizeof(*execlists->pending));
-			execlists->pending[0] = NULL;
 
-			trace_ports(execlists, "promoted", execlists->active);
-
-			if (enable_timeslice(engine))
+			if (enable_timeslice(execlists))
 				mod_timer(&execlists->timer, jiffies + 1);
 
 			if (!inject_preempt_hang(execlists))
 				ring_set_paused(engine, 0);
+
+			WRITE_ONCE(execlists->pending[0], NULL);
 			break;
 
 		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
@@ -1479,8 +1516,6 @@ static void process_csb(struct intel_engine_cs *engine)
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
 	lockdep_assert_held(&engine->active.lock);
-
-	process_csb(engine);
 	if (!engine->execlists.pending[0])
 		execlists_dequeue(engine);
 }
@@ -1494,9 +1529,12 @@ static void execlists_submission_tasklet(unsigned long data)
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->active.lock, flags);
-	__execlists_submission_tasklet(engine);
-	spin_unlock_irqrestore(&engine->active.lock, flags);
+	process_csb(engine);
+	if (!READ_ONCE(engine->execlists.pending[0])) {
+		spin_lock_irqsave(&engine->active.lock, flags);
+		__execlists_submission_tasklet(engine);
+		spin_unlock_irqrestore(&engine->active.lock, flags);
+	}
 }
 
 static void execlists_submission_timer(struct timer_list *timer)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index d652ba5d2320..562f756da421 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -161,17 +161,15 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
 	((typeof(ptr))((unsigned long)(ptr) | __bits));			\
 })
 
-#define ptr_count_dec(p_ptr) do {					\
-	typeof(p_ptr) __p = (p_ptr);					\
-	unsigned long __v = (unsigned long)(*__p);			\
-	*__p = (typeof(*p_ptr))(--__v);					\
-} while (0)
-
-#define ptr_count_inc(p_ptr) do {					\
-	typeof(p_ptr) __p = (p_ptr);					\
-	unsigned long __v = (unsigned long)(*__p);			\
-	*__p = (typeof(*p_ptr))(++__v);					\
-} while (0)
+#define ptr_dec(ptr) ({							\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(typeof(ptr))(__v - 1);						\
+})
+
+#define ptr_inc(ptr) ({							\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(typeof(ptr))(__v + 1);						\
+})
 
 #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
 #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] drm/i915/gt: Mark context->active_count as protected by timeline->mutex
  2019-08-16  9:24 [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
@ 2019-08-16  9:24 ` Chris Wilson
  2019-08-16 11:48   ` Mika Kuoppala
  2019-08-16  9:24 ` [PATCH 3/3] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2019-08-16  9:24 UTC (permalink / raw)
  To: intel-gfx

We use timeline->mutex to protect modifications to
context->active_count, and the associated enable/disable callbacks.
Due to complications with engine-pm barrier there is a path where we used
a "superlock" to provide serialised protect and so could not
unconditionally assert with lockdep that it was always held. However,
we can mark the mutex as taken (noting that we may be nested underneath
ourselves) which means we can be reassured the right timeline->mutex is
always treated as held and let lockdep roam free.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.h       |  3 +++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_engine_pm.c     | 14 ++++++++++++++
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  4 ++++
 drivers/gpu/drm/i915/i915_request.c           |  3 ++-
 5 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 053a1307ecb4..dd742ac2fbdb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -89,17 +89,20 @@ void intel_context_exit_engine(struct intel_context *ce);
 
 static inline void intel_context_enter(struct intel_context *ce)
 {
+	lockdep_assert_held(&ce->timeline->mutex);
 	if (!ce->active_count++)
 		ce->ops->enter(ce);
 }
 
 static inline void intel_context_mark_active(struct intel_context *ce)
 {
+	lockdep_assert_held(&ce->timeline->mutex);
 	++ce->active_count;
 }
 
 static inline void intel_context_exit(struct intel_context *ce)
 {
+	lockdep_assert_held(&ce->timeline->mutex);
 	GEM_BUG_ON(!ce->active_count);
 	if (!--ce->active_count)
 		ce->ops->exit(ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index d8ce266c049f..bf9cedfccbf0 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -59,7 +59,7 @@ struct intel_context {
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
 
-	unsigned int active_count; /* notionally protected by timeline->mutex */
+	unsigned int active_count; /* protected by timeline->mutex */
 
 	atomic_t pin_count;
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index f3f0109f9e22..5f03f7dcad72 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -37,6 +37,16 @@ static int __engine_unpark(struct intel_wakeref *wf)
 	return 0;
 }
 
+static inline void __timeline_mark_lock(struct intel_context *ce)
+{
+	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
+}
+
+static inline void __timeline_mark_unlock(struct intel_context *ce)
+{
+	mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
+}
+
 static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq;
@@ -61,6 +71,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	 * retiring the last request, thus all rings should be empty and
 	 * all timelines idle.
 	 */
+	__timeline_mark_lock(engine->kernel_context);
+
 	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
 	if (IS_ERR(rq))
 		/* Context switch failed, hope for the best! Maybe reset? */
@@ -80,6 +92,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	__intel_wakeref_defer_park(&engine->wakeref);
 	__i915_request_queue(rq, NULL);
 
+	__timeline_mark_unlock(engine->kernel_context);
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 7b476cd55dac..eafd94d5e211 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -338,6 +338,8 @@ void intel_timeline_enter(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
 
+	lockdep_assert_held(&tl->mutex);
+
 	GEM_BUG_ON(!atomic_read(&tl->pin_count));
 	if (tl->active_count++)
 		return;
@@ -352,6 +354,8 @@ void intel_timeline_exit(struct intel_timeline *tl)
 {
 	struct intel_gt_timelines *timelines = &tl->gt->timelines;
 
+	lockdep_assert_held(&tl->mutex);
+
 	GEM_BUG_ON(!tl->active_count);
 	if (--tl->active_count)
 		return;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 174c7fdd02ff..7edcd0fef5c6 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1087,7 +1087,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * precludes optimising to use semaphores serialisation of a single
 	 * timeline across engines.
 	 */
-	prev = rcu_dereference_protected(timeline->last_request.request, 1);
+	prev = rcu_dereference_protected(timeline->last_request.request,
+					 lockdep_is_held(&timeline->mutex));
 	if (prev && !i915_request_completed(prev)) {
 		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
 			i915_sw_fence_await_sw_fence(&rq->submit,
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/3] drm/i915: Markup expected timeline locks for i915_active
  2019-08-16  9:24 [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
  2019-08-16  9:24 ` [PATCH 2/3] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
@ 2019-08-16  9:24 ` Chris Wilson
  2019-08-16 12:02   ` Mika Kuoppala
  2019-08-16 11:05 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Patchwork
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2019-08-16  9:24 UTC (permalink / raw)
  To: intel-gfx

As every i915_active_request should be serialised by a dedicated lock,
i915_active consists of a tree of locks; one for each node. Markup up
the i915_active_request with what lock is supposed to be guarding it so
that we can verify that the serialised updated are indeed serialised.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/display/intel_overlay.c  |  2 +-
 .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 drivers/gpu/drm/i915/gt/intel_context.c       | 11 +++--------
 drivers/gpu/drm/i915/gt/intel_engine_pool.h   |  2 +-
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  7 +++----
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  4 ++++
 .../gpu/drm/i915/gt/selftests/mock_timeline.c |  2 +-
 drivers/gpu/drm/i915/i915_active.c            | 19 +++++++++++++++----
 drivers/gpu/drm/i915/i915_active.h            | 12 ++++++++++--
 drivers/gpu/drm/i915/i915_active_types.h      |  3 +++
 drivers/gpu/drm/i915/i915_vma.c               |  4 ++--
 drivers/gpu/drm/i915/selftests/i915_active.c  |  3 +--
 13 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index e1248eace0e1..eca41c4a5aa6 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -230,7 +230,7 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *))
 	if (IS_ERR(rq))
 		return rq;
 
-	err = i915_active_ref(&overlay->last_flip, rq->fence.context, rq);
+	err = i915_active_ref(&overlay->last_flip, rq->timeline, rq);
 	if (err) {
 		i915_request_add(rq);
 		return ERR_PTR(err);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
index 6a4e84157bf2..818ac6915bc5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work)
 	 * keep track of the GPU activity within this vma/request, and
 	 * propagate the signal from the request to w->dma.
 	 */
-	err = i915_active_ref(&vma->active, rq->fence.context, rq);
+	err = i915_active_ref(&vma->active, rq->timeline, rq);
 	if (err)
 		goto out_request;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index a6b0cb714292..cd1fd2e5423a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -908,7 +908,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (emit)
 			err = emit(rq, data);
 		if (err == 0)
-			err = i915_active_ref(&cb->base, rq->fence.context, rq);
+			err = i915_active_ref(&cb->base, rq->timeline, rq);
 
 		i915_request_add(rq);
 		if (err)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 9114953bf920..f55691d151ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -306,10 +306,10 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 
 		/* Queue this switch after current activity by this context. */
 		err = i915_active_request_set(&tl->last_request, rq);
+		mutex_unlock(&tl->mutex);
 		if (err)
-			goto unlock;
+			return err;
 	}
-	lockdep_assert_held(&tl->mutex);
 
 	/*
 	 * Guarantee context image and the timeline remains pinned until the
@@ -319,12 +319,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 	 * words transfer the pinned ce object to tracked active request.
 	 */
 	GEM_BUG_ON(i915_active_is_idle(&ce->active));
-	err = i915_active_ref(&ce->active, rq->fence.context, rq);
-
-unlock:
-	if (rq->timeline != tl)
-		mutex_unlock(&tl->mutex);
-	return err;
+	return i915_active_ref(&ce->active, rq->timeline, rq);
 }
 
 struct i915_request *intel_context_create_request(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
index f7a0a660c1c9..8d069efd9457 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
@@ -18,7 +18,7 @@ static inline int
 intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
 			      struct i915_request *rq)
 {
-	return i915_active_ref(&node->active, rq->fence.context, rq);
+	return i915_active_ref(&node->active, rq->timeline, rq);
 }
 
 static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index eafd94d5e211..02fbe11b671b 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -254,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request);
+	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
@@ -440,8 +440,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 	 * free it after the current request is retired, which ensures that
 	 * all writes into the cacheline from previous requests are complete.
 	 */
-	err = i915_active_ref(&tl->hwsp_cacheline->active,
-			      tl->fence_context, rq);
+	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
 	if (err)
 		goto err_cacheline;
 
@@ -492,7 +491,7 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
 static int cacheline_ref(struct intel_timeline_cacheline *cl,
 			 struct i915_request *rq)
 {
-	return i915_active_ref(&cl->active, rq->fence.context, rq);
+	return i915_active_ref(&cl->active, rq->timeline, rq);
 }
 
 int intel_timeline_read_hwsp(struct i915_request *from,
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index d54113697745..321481403165 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -689,7 +689,9 @@ static int live_hwsp_wrap(void *arg)
 
 		tl->seqno = -4u;
 
+		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
+		mutex_unlock(&tl->mutex);
 		if (err) {
 			i915_request_add(rq);
 			goto out;
@@ -704,7 +706,9 @@ static int live_hwsp_wrap(void *arg)
 		}
 		hwsp_seqno[0] = tl->hwsp_seqno;
 
+		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
+		mutex_unlock(&tl->mutex);
 		if (err) {
 			i915_request_add(rq);
 			goto out;
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 5c549205828a..598170efcaf6 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
 
 	mutex_init(&timeline->mutex);
 
-	INIT_ACTIVE_REQUEST(&timeline->last_request);
+	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
 	INIT_LIST_HEAD(&timeline->requests);
 
 	i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 2439c4f62ad8..df6164591702 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -169,10 +169,11 @@ node_retire(struct i915_active_request *base, struct i915_request *rq)
 }
 
 static struct i915_active_request *
-active_instance(struct i915_active *ref, u64 idx)
+active_instance(struct i915_active *ref, struct intel_timeline *tl)
 {
 	struct active_node *node, *prealloc;
 	struct rb_node **p, *parent;
+	u64 idx = tl->fence_context;
 
 	/*
 	 * We track the most recently used timeline to skip a rbtree search
@@ -211,7 +212,7 @@ active_instance(struct i915_active *ref, u64 idx)
 	}
 
 	node = prealloc;
-	i915_active_request_init(&node->base, NULL, node_retire);
+	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
 	node->ref = ref;
 	node->timeline = idx;
 
@@ -294,18 +295,20 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
 }
 
 int i915_active_ref(struct i915_active *ref,
-		    u64 timeline,
+		    struct intel_timeline *tl,
 		    struct i915_request *rq)
 {
 	struct i915_active_request *active;
 	int err;
 
+	lockdep_assert_held(&tl->mutex);
+
 	/* Prevent reaping in case we malloc/wait while building the tree */
 	err = i915_active_acquire(ref);
 	if (err)
 		return err;
 
-	active = active_instance(ref, timeline);
+	active = active_instance(ref, tl);
 	if (!active) {
 		err = -ENOMEM;
 		goto out;
@@ -596,6 +599,10 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 				goto unwind;
 			}
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+			node->base.lock =
+				&engine->kernel_context->timeline->mutex;
+#endif
 			RCU_INIT_POINTER(node->base.request, NULL);
 			node->base.retire = node_retire;
 			node->timeline = idx;
@@ -701,6 +708,10 @@ int i915_active_request_set(struct i915_active_request *active,
 {
 	int err;
 
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	lockdep_assert_held(active->lock);
+#endif
+
 	/* Must maintain ordering wrt previous active requests */
 	err = i915_request_await_active_request(rq, active);
 	if (err)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index f6d730cf2fe6..f95058f99057 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -58,15 +58,20 @@ void i915_active_retire_noop(struct i915_active_request *active,
  */
 static inline void
 i915_active_request_init(struct i915_active_request *active,
+			 struct mutex *lock,
 			 struct i915_request *rq,
 			 i915_active_retire_fn retire)
 {
 	RCU_INIT_POINTER(active->request, rq);
 	INIT_LIST_HEAD(&active->link);
 	active->retire = retire ?: i915_active_retire_noop;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	active->lock = lock;
+#endif
 }
 
-#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
+#define INIT_ACTIVE_REQUEST(name, lock) \
+	i915_active_request_init((name), (lock), NULL, NULL)
 
 /**
  * i915_active_request_set - updates the tracker to watch the current request
@@ -81,6 +86,9 @@ static inline void
 __i915_active_request_set(struct i915_active_request *active,
 			  struct i915_request *request)
 {
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	lockdep_assert_held(active->lock);
+#endif
 	list_move(&active->link, &request->active_list);
 	rcu_assign_pointer(active->request, request);
 }
@@ -362,7 +370,7 @@ void __i915_active_init(struct drm_i915_private *i915,
 } while (0)
 
 int i915_active_ref(struct i915_active *ref,
-		    u64 timeline,
+		    struct intel_timeline *tl,
 		    struct i915_request *rq);
 
 int i915_active_wait(struct i915_active *ref);
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index ae3ee441c114..d857bd12aa7e 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -24,6 +24,9 @@ struct i915_active_request {
 	struct i915_request __rcu *request;
 	struct list_head link;
 	i915_active_retire_fn retire;
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	struct mutex *lock;
+#endif
 };
 
 struct active_node;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index a32243951b29..73b02234affe 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -903,14 +903,14 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	err = i915_active_ref(&vma->active, rq->fence.context, rq);
+	err = i915_active_ref(&vma->active, rq->timeline, rq);
 	if (unlikely(err))
 		return err;
 
 	if (flags & EXEC_OBJECT_WRITE) {
 		if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS))
 			i915_active_ref(&obj->frontbuffer->write,
-					rq->fence.context,
+					rq->timeline,
 					rq);
 
 		dma_resv_add_excl_fence(vma->resv, &rq->fence);
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index e5cd5d47e380..77d844ac8b71 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -110,8 +110,7 @@ __live_active_setup(struct drm_i915_private *i915)
 						       submit,
 						       GFP_KERNEL);
 		if (err >= 0)
-			err = i915_active_ref(&active->base,
-					      rq->fence.context, rq);
+			err = i915_active_ref(&active->base, rq->timeline, rq);
 		i915_request_add(rq);
 		if (err) {
 			pr_err("Failed to track active ref!\n");
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
  2019-08-16  9:24 [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
  2019-08-16  9:24 ` [PATCH 2/3] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
  2019-08-16  9:24 ` [PATCH 3/3] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
@ 2019-08-16 11:05 ` Patchwork
  2019-08-16 11:26 ` ✗ Fi.CI.BAT: failure " Patchwork
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-08-16 11:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
URL   : https://patchwork.freedesktop.org/series/65294/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
03b058860a18 drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
b0f3ecfca70d drm/i915/gt: Mark context->active_count as protected by timeline->mutex
41bdc71c13f2 drm/i915: Markup expected timeline locks for i915_active
-:290: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#290: FILE: drivers/gpu/drm/i915/i915_active_types.h:28:
+	struct mutex *lock;

total: 0 errors, 0 warnings, 1 checks, 242 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
  2019-08-16  9:24 [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
                   ` (2 preceding siblings ...)
  2019-08-16 11:05 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Patchwork
@ 2019-08-16 11:26 ` Patchwork
  2019-08-16 11:28   ` Chris Wilson
  2019-08-16 11:38 ` [PATCH 1/3] " Mika Kuoppala
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 15+ messages in thread
From: Patchwork @ 2019-08-16 11:26 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
URL   : https://patchwork.freedesktop.org/series/65294/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6716 -> Patchwork_14048
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_14048 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_14048, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_14048:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_sync@basic-store-each:
    - fi-cfl-8109u:       [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6716/fi-cfl-8109u/igt@gem_sync@basic-store-each.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/fi-cfl-8109u/igt@gem_sync@basic-store-each.html

  
Known issues
------------

  Here are the changes found in Patchwork_14048 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_mmap_gtt@basic-small-bo-tiledx:
    - fi-icl-u3:          [PASS][3] -> [DMESG-WARN][4] ([fdo#107724]) +2 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6716/fi-icl-u3/igt@gem_mmap_gtt@basic-small-bo-tiledx.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/fi-icl-u3/igt@gem_mmap_gtt@basic-small-bo-tiledx.html

  * igt@kms_busy@basic-flip-a:
    - fi-kbl-7567u:       [PASS][5] -> [SKIP][6] ([fdo#109271] / [fdo#109278]) +2 similar issues
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6716/fi-kbl-7567u/igt@kms_busy@basic-flip-a.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/fi-kbl-7567u/igt@kms_busy@basic-flip-a.html

  
#### Possible fixes ####

  * igt@gem_exec_reloc@basic-write-gtt:
    - fi-icl-u3:          [DMESG-WARN][7] ([fdo#107724]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6716/fi-icl-u3/igt@gem_exec_reloc@basic-write-gtt.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/fi-icl-u3/igt@gem_exec_reloc@basic-write-gtt.html

  * igt@i915_selftest@live_execlists:
    - fi-skl-gvtdvm:      [DMESG-FAIL][9] ([fdo#111108]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6716/fi-skl-gvtdvm/igt@i915_selftest@live_execlists.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/fi-skl-gvtdvm/igt@i915_selftest@live_execlists.html
    - fi-bwr-2160:        [DMESG-WARN][11] ([fdo#111115]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6716/fi-bwr-2160/igt@i915_selftest@live_execlists.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/fi-bwr-2160/igt@i915_selftest@live_execlists.html

  * igt@i915_selftest@live_hangcheck:
    - fi-bwr-2160:        [DMESG-FAIL][13] ([fdo#111115]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6716/fi-bwr-2160/igt@i915_selftest@live_hangcheck.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/fi-bwr-2160/igt@i915_selftest@live_hangcheck.html

  * igt@i915_selftest@live_requests:
    - fi-byt-j1900:       [INCOMPLETE][15] ([fdo#102657]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6716/fi-byt-j1900/igt@i915_selftest@live_requests.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/fi-byt-j1900/igt@i915_selftest@live_requests.html

  
  [fdo#102657]: https://bugs.freedesktop.org/show_bug.cgi?id=102657
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#111108]: https://bugs.freedesktop.org/show_bug.cgi?id=111108
  [fdo#111115]: https://bugs.freedesktop.org/show_bug.cgi?id=111115


Participating hosts (54 -> 46)
------------------------------

  Additional (1): fi-cfl-8700k 
  Missing    (9): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6716 -> Patchwork_14048

  CI-20190529: 20190529
  CI_DRM_6716: 64ecd8f88d7b55de82ff414784ae4daca93d0577 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5138: b9abe0bf6c478c4f6cac56bff286d6926ad8c0ab @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14048: 41bdc71c13f216b0ba5878af9888c3ee938443f8 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

41bdc71c13f2 drm/i915: Markup expected timeline locks for i915_active
b0f3ecfca70d drm/i915/gt: Mark context->active_count as protected by timeline->mutex
03b058860a18 drm/i915/execlists: Lift process_csb() out of the irq-off spinlock

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
  2019-08-16 11:26 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2019-08-16 11:28   ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2019-08-16 11:28 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2019-08-16 12:26:55)
>   * igt@gem_sync@basic-store-each:
>     - fi-cfl-8109u:       [PASS][1] -> [INCOMPLETE][2]
>    [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6716/fi-cfl-8109u/igt@gem_sync@basic-store-each.html
>    [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14048/fi-cfl-8109u/igt@gem_sync@basic-store-each.html

Hmm, you were unconvinced by the new enable_timeslice()
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
  2019-08-16  9:24 [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
                   ` (3 preceding siblings ...)
  2019-08-16 11:26 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2019-08-16 11:38 ` Mika Kuoppala
  2019-08-16 11:49 ` [PATCH] " Chris Wilson
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 15+ messages in thread
From: Mika Kuoppala @ 2019-08-16 11:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> If we only call process_csb() from the tasklet, though we lose the
> ability to bypass ksoftirqd interrupt processing on direct submission
> paths, we can push it out of the irq-off spinlock.
>
> The penalty is that we then allow schedule_out to be called concurrently
> with schedule_in requiring us to handle the usage count (baked into the
> pointer itself) atomically.
>
> As we do kick the tasklets (via local_bh_enable()) after our submission,
> there is a possibility there to see if we can pull the local softirq
> processing back from the ksoftirqd.
>
> v2: Store the 'switch_priority_hint' on submission, so that we can
> safely check during process_csb().
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  |  10 ++
>  drivers/gpu/drm/i915/gt/intel_lrc.c           | 136 +++++++++++-------
>  drivers/gpu/drm/i915/i915_utils.h             |  20 ++-
>  5 files changed, 108 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index a632b20ec4d8..d8ce266c049f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -41,9 +41,7 @@ struct intel_context {
>  	struct intel_engine_cs *engine;
>  	struct intel_engine_cs *inflight;
>  #define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
> -#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
> -#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
> -#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
> +#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
>  
>  	struct i915_address_space *vm;
>  	struct i915_gem_context *gem_context;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 957f27a2ec97..ba457c1c7dc0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1459,7 +1459,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>  
>  		for (port = execlists->pending; (rq = *port); port++) {
>  			/* Exclude any contexts already counted in active */
> -			if (intel_context_inflight_count(rq->hw_context) == 1)
> +			if (!intel_context_inflight_count(rq->hw_context))
>  				engine->stats.active++;
>  		}
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 9965a32601d6..5441aa9cb863 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -204,6 +204,16 @@ struct intel_engine_execlists {
>  	 */
>  	unsigned int port_mask;
>  
> +	/**
> +	 * @switch_priority_hint: Second context priority.
> +	 *
> +	 * We submit multiple contexts to the HW simultaneously and would
> +	 * like to occasionally switch between them to emulate timeslicing.
> +	 * To know when timeslicing is suitable, we track the priority of
> +	 * the context submitted second.
> +	 */
> +	int switch_priority_hint;
> +
>  	/**
>  	 * @queue_priority_hint: Highest pending priority.
>  	 *
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e9863f4d826b..8cb8c5303f42 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -547,27 +547,39 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
>  				   status, rq);
>  }
>  
> +static inline struct intel_engine_cs *
> +__execlists_schedule_in(struct i915_request *rq)
> +{
> +	struct intel_engine_cs * const engine = rq->engine;
> +	struct intel_context * const ce = rq->hw_context;
> +
> +	intel_context_get(ce);
> +
> +	intel_gt_pm_get(engine->gt);
> +	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> +	intel_engine_context_in(engine);
> +
> +	return engine;
> +}
> +
>  static inline struct i915_request *
>  execlists_schedule_in(struct i915_request *rq, int idx)
>  {
> -	struct intel_context *ce = rq->hw_context;
> -	int count;
> +	struct intel_context * const ce = rq->hw_context;
> +	struct intel_engine_cs *old;
>  
> +	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
>  	trace_i915_request_in(rq, idx);
>  
> -	count = intel_context_inflight_count(ce);
> -	if (!count) {
> -		intel_context_get(ce);
> -		ce->inflight = rq->engine;
> -
> -		intel_gt_pm_get(ce->inflight->gt);
> -		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> -		intel_engine_context_in(ce->inflight);
> -	}
> +	old = READ_ONCE(ce->inflight);
> +	do {
> +		if (!old) {

From other thread got the explanation for this. Perhaps a comment
for sole ownership should be warranted. But I don't insist
If you don't have old how could you have end up scheduling out.

> +			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
> +			break;
> +		}
> +	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
>  
> -	intel_context_inflight_inc(ce);
>  	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
> -
>  	return i915_request_get(rq);
>  }
>  
> @@ -581,35 +593,45 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
>  }
>  
>  static inline void
> -execlists_schedule_out(struct i915_request *rq)
> +__execlists_schedule_out(struct i915_request *rq,
> +			 struct intel_engine_cs * const engine)
>  {
> -	struct intel_context *ce = rq->hw_context;
> +	struct intel_context * const ce = rq->hw_context;
>  
> -	GEM_BUG_ON(!intel_context_inflight_count(ce));
> +	intel_engine_context_out(engine);
> +	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> +	intel_gt_pm_put(engine->gt);
>  
> -	trace_i915_request_out(rq);
> +	/*
> +	 * If this is part of a virtual engine, its next request may
> +	 * have been blocked waiting for access to the active context.
> +	 * We have to kick all the siblings again in case we need to
> +	 * switch (e.g. the next request is not runnable on this
> +	 * engine). Hopefully, we will already have submitted the next
> +	 * request before the tasklet runs and do not need to rebuild
> +	 * each virtual tree and kick everyone again.
> +	 */
> +	if (ce->engine != engine)
> +		kick_siblings(rq, ce);
>  
> -	intel_context_inflight_dec(ce);
> -	if (!intel_context_inflight_count(ce)) {
> -		intel_engine_context_out(ce->inflight);
> -		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> -		intel_gt_pm_put(ce->inflight->gt);
> +	intel_context_put(ce);
> +}
>  
> -		/*
> -		 * If this is part of a virtual engine, its next request may
> -		 * have been blocked waiting for access to the active context.
> -		 * We have to kick all the siblings again in case we need to
> -		 * switch (e.g. the next request is not runnable on this
> -		 * engine). Hopefully, we will already have submitted the next
> -		 * request before the tasklet runs and do not need to rebuild
> -		 * each virtual tree and kick everyone again.
> -		 */
> -		ce->inflight = NULL;
> -		if (rq->engine != ce->engine)
> -			kick_siblings(rq, ce);
> +static inline void
> +execlists_schedule_out(struct i915_request *rq)
> +{
> +	struct intel_context * const ce = rq->hw_context;
> +	struct intel_engine_cs *cur, *old;
>  
> -		intel_context_put(ce);
> -	}
> +	trace_i915_request_out(rq);
> +	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
> +
> +	old = READ_ONCE(ce->inflight);
> +	do
> +		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
> +	while (!try_cmpxchg(&ce->inflight, &old, cur));
> +	if (!cur)
> +		__execlists_schedule_out(rq, old);
>  
>  	i915_request_put(rq);
>  }
> @@ -684,6 +706,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
>  
>  	trace_ports(execlists, msg, execlists->pending);
>  
> +	if (!execlists->pending[0])

Would it be prudent to put READ_ONCE here also. Even if
you could contemplate that you don't see the
assert ever needed in other places.

Now it seem to be contained tho,

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>


> +		return false;
> +
>  	if (execlists->pending[execlists_num_ports(execlists)])
>  		return false;
>  
> @@ -942,11 +967,23 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
>  }
>  
>  static bool
> -enable_timeslice(struct intel_engine_cs *engine)
> +switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
> +{
> +	if (list_is_last(&rq->sched.link, &engine->active.requests))
> +		return INT_MIN;
> +
> +	return rq_prio(list_next_entry(rq, sched.link));
> +}
> +
> +static bool
> +enable_timeslice(const struct intel_engine_execlists *execlists)
>  {
> -	struct i915_request *last = last_active(&engine->execlists);
> +	const struct i915_request *rq = *execlists->active;
>  
> -	return last && need_timeslice(engine, last);
> +	if (i915_request_completed(rq))
> +		return false;
> +
> +	return execlists->switch_priority_hint >= effective_prio(rq);
>  }
>  
>  static void record_preemption(struct intel_engine_execlists *execlists)
> @@ -1292,6 +1329,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  		*port = execlists_schedule_in(last, port - execlists->pending);
>  		memset(port + 1, 0, (last_port - port) * sizeof(*port));
>  		execlists_submit_ports(engine);
> +		execlists->switch_priority_hint =
> +			switch_prio(engine, *execlists->pending);
>  	} else {
>  		ring_set_paused(engine, 0);
>  	}
> @@ -1356,7 +1395,6 @@ static void process_csb(struct intel_engine_cs *engine)
>  	const u8 num_entries = execlists->csb_size;
>  	u8 head, tail;
>  
> -	lockdep_assert_held(&engine->active.lock);
>  	GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
>  
>  	/*
> @@ -1427,15 +1465,14 @@ static void process_csb(struct intel_engine_cs *engine)
>  				       execlists->pending,
>  				       execlists_num_ports(execlists) *
>  				       sizeof(*execlists->pending));
> -			execlists->pending[0] = NULL;
>  
> -			trace_ports(execlists, "promoted", execlists->active);
> -
> -			if (enable_timeslice(engine))
> +			if (enable_timeslice(execlists))
>  				mod_timer(&execlists->timer, jiffies + 1);
>  
>  			if (!inject_preempt_hang(execlists))
>  				ring_set_paused(engine, 0);
> +
> +			WRITE_ONCE(execlists->pending[0], NULL);
>  			break;
>  
>  		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
> @@ -1479,8 +1516,6 @@ static void process_csb(struct intel_engine_cs *engine)
>  static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
>  {
>  	lockdep_assert_held(&engine->active.lock);
> -
> -	process_csb(engine);
>  	if (!engine->execlists.pending[0])
>  		execlists_dequeue(engine);
>  }
> @@ -1494,9 +1529,12 @@ static void execlists_submission_tasklet(unsigned long data)
>  	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&engine->active.lock, flags);
> -	__execlists_submission_tasklet(engine);
> -	spin_unlock_irqrestore(&engine->active.lock, flags);
> +	process_csb(engine);
> +	if (!READ_ONCE(engine->execlists.pending[0])) {
> +		spin_lock_irqsave(&engine->active.lock, flags);
> +		__execlists_submission_tasklet(engine);
> +		spin_unlock_irqrestore(&engine->active.lock, flags);
> +	}
>  }
>  
>  static void execlists_submission_timer(struct timer_list *timer)
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index d652ba5d2320..562f756da421 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -161,17 +161,15 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
>  	((typeof(ptr))((unsigned long)(ptr) | __bits));			\
>  })
>  
> -#define ptr_count_dec(p_ptr) do {					\
> -	typeof(p_ptr) __p = (p_ptr);					\
> -	unsigned long __v = (unsigned long)(*__p);			\
> -	*__p = (typeof(*p_ptr))(--__v);					\
> -} while (0)
> -
> -#define ptr_count_inc(p_ptr) do {					\
> -	typeof(p_ptr) __p = (p_ptr);					\
> -	unsigned long __v = (unsigned long)(*__p);			\
> -	*__p = (typeof(*p_ptr))(++__v);					\
> -} while (0)
> +#define ptr_dec(ptr) ({							\
> +	unsigned long __v = (unsigned long)(ptr);			\
> +	(typeof(ptr))(__v - 1);						\
> +})
> +
> +#define ptr_inc(ptr) ({							\
> +	unsigned long __v = (unsigned long)(ptr);			\
> +	(typeof(ptr))(__v + 1);						\
> +})
>  
>  #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
>  #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
> -- 
> 2.23.0.rc1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] drm/i915/gt: Mark context->active_count as protected by timeline->mutex
  2019-08-16  9:24 ` [PATCH 2/3] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
@ 2019-08-16 11:48   ` Mika Kuoppala
  0 siblings, 0 replies; 15+ messages in thread
From: Mika Kuoppala @ 2019-08-16 11:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We use timeline->mutex to protect modifications to
> context->active_count, and the associated enable/disable callbacks.
> Due to complications with engine-pm barrier there is a path where we used
> a "superlock" to provide serialised protect and so could not
> unconditionally assert with lockdep that it was always held. However,
> we can mark the mutex as taken (noting that we may be nested underneath
> ourselves) which means we can be reassured the right timeline->mutex is
> always treated as held and let lockdep roam free.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/gt/intel_context.h       |  3 +++
>  drivers/gpu/drm/i915/gt/intel_context_types.h |  2 +-
>  drivers/gpu/drm/i915/gt/intel_engine_pm.c     | 14 ++++++++++++++
>  drivers/gpu/drm/i915/gt/intel_timeline.c      |  4 ++++
>  drivers/gpu/drm/i915/i915_request.c           |  3 ++-
>  5 files changed, 24 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 053a1307ecb4..dd742ac2fbdb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -89,17 +89,20 @@ void intel_context_exit_engine(struct intel_context *ce);
>  
>  static inline void intel_context_enter(struct intel_context *ce)
>  {
> +	lockdep_assert_held(&ce->timeline->mutex);
>  	if (!ce->active_count++)
>  		ce->ops->enter(ce);
>  }
>  
>  static inline void intel_context_mark_active(struct intel_context *ce)
>  {
> +	lockdep_assert_held(&ce->timeline->mutex);
>  	++ce->active_count;
>  }
>  
>  static inline void intel_context_exit(struct intel_context *ce)
>  {
> +	lockdep_assert_held(&ce->timeline->mutex);
>  	GEM_BUG_ON(!ce->active_count);
>  	if (!--ce->active_count)
>  		ce->ops->exit(ce);
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index d8ce266c049f..bf9cedfccbf0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -59,7 +59,7 @@ struct intel_context {
>  	u32 *lrc_reg_state;
>  	u64 lrc_desc;
>  
> -	unsigned int active_count; /* notionally protected by timeline->mutex */
> +	unsigned int active_count; /* protected by timeline->mutex */
>  
>  	atomic_t pin_count;
>  	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index f3f0109f9e22..5f03f7dcad72 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -37,6 +37,16 @@ static int __engine_unpark(struct intel_wakeref *wf)
>  	return 0;
>  }
>  
> +static inline void __timeline_mark_lock(struct intel_context *ce)
> +{
> +	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
> +}
> +
> +static inline void __timeline_mark_unlock(struct intel_context *ce)
> +{
> +	mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
> +}
> +
>  static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>  {
>  	struct i915_request *rq;
> @@ -61,6 +71,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>  	 * retiring the last request, thus all rings should be empty and
>  	 * all timelines idle.
>  	 */
> +	__timeline_mark_lock(engine->kernel_context);
> +
>  	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
>  	if (IS_ERR(rq))
>  		/* Context switch failed, hope for the best! Maybe reset? */
> @@ -80,6 +92,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>  	__intel_wakeref_defer_park(&engine->wakeref);
>  	__i915_request_queue(rq, NULL);
>  
> +	__timeline_mark_unlock(engine->kernel_context);
> +
>  	return false;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 7b476cd55dac..eafd94d5e211 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -338,6 +338,8 @@ void intel_timeline_enter(struct intel_timeline *tl)
>  {
>  	struct intel_gt_timelines *timelines = &tl->gt->timelines;
>  
> +	lockdep_assert_held(&tl->mutex);
> +
>  	GEM_BUG_ON(!atomic_read(&tl->pin_count));
>  	if (tl->active_count++)
>  		return;
> @@ -352,6 +354,8 @@ void intel_timeline_exit(struct intel_timeline *tl)
>  {
>  	struct intel_gt_timelines *timelines = &tl->gt->timelines;
>  
> +	lockdep_assert_held(&tl->mutex);
> +
>  	GEM_BUG_ON(!tl->active_count);
>  	if (--tl->active_count)
>  		return;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 174c7fdd02ff..7edcd0fef5c6 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1087,7 +1087,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
>  	 * precludes optimising to use semaphores serialisation of a single
>  	 * timeline across engines.
>  	 */
> -	prev = rcu_dereference_protected(timeline->last_request.request, 1);
> +	prev = rcu_dereference_protected(timeline->last_request.request,
> +					 lockdep_is_held(&timeline->mutex));
>  	if (prev && !i915_request_completed(prev)) {
>  		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
>  			i915_sw_fence_await_sw_fence(&rq->submit,
> -- 
> 2.23.0.rc1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
  2019-08-16  9:24 [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
                   ` (4 preceding siblings ...)
  2019-08-16 11:38 ` [PATCH 1/3] " Mika Kuoppala
@ 2019-08-16 11:49 ` Chris Wilson
  2019-08-16 16:16 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with drm/i915/execlists: Lift process_csb() out of the irq-off spinlock (rev2) Patchwork
  2019-08-16 16:37 ` ✗ Fi.CI.BAT: failure " Patchwork
  7 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2019-08-16 11:49 UTC (permalink / raw)
  To: intel-gfx

If we only call process_csb() from the tasklet, though we lose the
ability to bypass ksoftirqd interrupt processing on direct submission
paths, we can push it out of the irq-off spinlock.

The penalty is that we then allow schedule_out to be called concurrently
with schedule_in requiring us to handle the usage count (baked into the
pointer itself) atomically.

As we do kick the tasklets (via local_bh_enable()) after our submission,
there is a possibility there to see if we can pull the local softirq
processing back from the ksoftirqd.

v2: Store the 'switch_priority_hint' on submission, so that we can
safely check during process_csb().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  10 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 136 +++++++++++-------
 drivers/gpu/drm/i915/i915_utils.h             |  20 ++-
 5 files changed, 108 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index a632b20ec4d8..d8ce266c049f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -41,9 +41,7 @@ struct intel_context {
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
 #define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
-#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
-#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
-#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
+#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
 
 	struct i915_address_space *vm;
 	struct i915_gem_context *gem_context;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 957f27a2ec97..ba457c1c7dc0 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1459,7 +1459,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 		for (port = execlists->pending; (rq = *port); port++) {
 			/* Exclude any contexts already counted in active */
-			if (intel_context_inflight_count(rq->hw_context) == 1)
+			if (!intel_context_inflight_count(rq->hw_context))
 				engine->stats.active++;
 		}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9965a32601d6..5441aa9cb863 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -204,6 +204,16 @@ struct intel_engine_execlists {
 	 */
 	unsigned int port_mask;
 
+	/**
+	 * @switch_priority_hint: Second context priority.
+	 *
+	 * We submit multiple contexts to the HW simultaneously and would
+	 * like to occasionally switch between them to emulate timeslicing.
+	 * To know when timeslicing is suitable, we track the priority of
+	 * the context submitted second.
+	 */
+	int switch_priority_hint;
+
 	/**
 	 * @queue_priority_hint: Highest pending priority.
 	 *
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e9863f4d826b..2978cf16fb9b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -547,27 +547,39 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
+static inline struct intel_engine_cs *
+__execlists_schedule_in(struct i915_request *rq)
+{
+	struct intel_engine_cs * const engine = rq->engine;
+	struct intel_context * const ce = rq->hw_context;
+
+	intel_context_get(ce);
+
+	intel_gt_pm_get(engine->gt);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+	intel_engine_context_in(engine);
+
+	return engine;
+}
+
 static inline struct i915_request *
 execlists_schedule_in(struct i915_request *rq, int idx)
 {
-	struct intel_context *ce = rq->hw_context;
-	int count;
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *old;
 
+	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
 	trace_i915_request_in(rq, idx);
 
-	count = intel_context_inflight_count(ce);
-	if (!count) {
-		intel_context_get(ce);
-		ce->inflight = rq->engine;
-
-		intel_gt_pm_get(ce->inflight->gt);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-		intel_engine_context_in(ce->inflight);
-	}
+	old = READ_ONCE(ce->inflight);
+	do {
+		if (!old) {
+			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+			break;
+		}
+	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
 
-	intel_context_inflight_inc(ce);
 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
-
 	return i915_request_get(rq);
 }
 
@@ -581,35 +593,45 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 }
 
 static inline void
-execlists_schedule_out(struct i915_request *rq)
+__execlists_schedule_out(struct i915_request *rq,
+			 struct intel_engine_cs * const engine)
 {
-	struct intel_context *ce = rq->hw_context;
+	struct intel_context * const ce = rq->hw_context;
 
-	GEM_BUG_ON(!intel_context_inflight_count(ce));
+	intel_engine_context_out(engine);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+	intel_gt_pm_put(engine->gt);
 
-	trace_i915_request_out(rq);
+	/*
+	 * If this is part of a virtual engine, its next request may
+	 * have been blocked waiting for access to the active context.
+	 * We have to kick all the siblings again in case we need to
+	 * switch (e.g. the next request is not runnable on this
+	 * engine). Hopefully, we will already have submitted the next
+	 * request before the tasklet runs and do not need to rebuild
+	 * each virtual tree and kick everyone again.
+	 */
+	if (ce->engine != engine)
+		kick_siblings(rq, ce);
 
-	intel_context_inflight_dec(ce);
-	if (!intel_context_inflight_count(ce)) {
-		intel_engine_context_out(ce->inflight);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
-		intel_gt_pm_put(ce->inflight->gt);
+	intel_context_put(ce);
+}
 
-		/*
-		 * If this is part of a virtual engine, its next request may
-		 * have been blocked waiting for access to the active context.
-		 * We have to kick all the siblings again in case we need to
-		 * switch (e.g. the next request is not runnable on this
-		 * engine). Hopefully, we will already have submitted the next
-		 * request before the tasklet runs and do not need to rebuild
-		 * each virtual tree and kick everyone again.
-		 */
-		ce->inflight = NULL;
-		if (rq->engine != ce->engine)
-			kick_siblings(rq, ce);
+static inline void
+execlists_schedule_out(struct i915_request *rq)
+{
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *cur, *old;
 
-		intel_context_put(ce);
-	}
+	trace_i915_request_out(rq);
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+
+	old = READ_ONCE(ce->inflight);
+	do
+		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
+	while (!try_cmpxchg(&ce->inflight, &old, cur));
+	if (!cur)
+		__execlists_schedule_out(rq, old);
 
 	i915_request_put(rq);
 }
@@ -684,6 +706,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 
 	trace_ports(execlists, msg, execlists->pending);
 
+	if (!execlists->pending[0])
+		return false;
+
 	if (execlists->pending[execlists_num_ports(execlists)])
 		return false;
 
@@ -941,12 +966,24 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
 	return hint >= effective_prio(rq);
 }
 
+static int
+switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+	if (list_is_last(&rq->sched.link, &engine->active.requests))
+		return INT_MIN;
+
+	return rq_prio(list_next_entry(rq, sched.link));
+}
+
 static bool
-enable_timeslice(struct intel_engine_cs *engine)
+enable_timeslice(const struct intel_engine_execlists *execlists)
 {
-	struct i915_request *last = last_active(&engine->execlists);
+	const struct i915_request *rq = *execlists->active;
 
-	return last && need_timeslice(engine, last);
+	if (i915_request_completed(rq))
+		return false;
+
+	return execlists->switch_priority_hint >= effective_prio(rq);
 }
 
 static void record_preemption(struct intel_engine_execlists *execlists)
@@ -1292,6 +1329,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		*port = execlists_schedule_in(last, port - execlists->pending);
 		memset(port + 1, 0, (last_port - port) * sizeof(*port));
 		execlists_submit_ports(engine);
+		execlists->switch_priority_hint =
+			switch_prio(engine, *execlists->pending);
 	} else {
 		ring_set_paused(engine, 0);
 	}
@@ -1356,7 +1395,6 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
-	lockdep_assert_held(&engine->active.lock);
 	GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
 
 	/*
@@ -1427,15 +1465,14 @@ static void process_csb(struct intel_engine_cs *engine)
 				       execlists->pending,
 				       execlists_num_ports(execlists) *
 				       sizeof(*execlists->pending));
-			execlists->pending[0] = NULL;
 
-			trace_ports(execlists, "promoted", execlists->active);
-
-			if (enable_timeslice(engine))
+			if (enable_timeslice(execlists))
 				mod_timer(&execlists->timer, jiffies + 1);
 
 			if (!inject_preempt_hang(execlists))
 				ring_set_paused(engine, 0);
+
+			WRITE_ONCE(execlists->pending[0], NULL);
 			break;
 
 		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
@@ -1479,8 +1516,6 @@ static void process_csb(struct intel_engine_cs *engine)
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
 	lockdep_assert_held(&engine->active.lock);
-
-	process_csb(engine);
 	if (!engine->execlists.pending[0])
 		execlists_dequeue(engine);
 }
@@ -1494,9 +1529,12 @@ static void execlists_submission_tasklet(unsigned long data)
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->active.lock, flags);
-	__execlists_submission_tasklet(engine);
-	spin_unlock_irqrestore(&engine->active.lock, flags);
+	process_csb(engine);
+	if (!READ_ONCE(engine->execlists.pending[0])) {
+		spin_lock_irqsave(&engine->active.lock, flags);
+		__execlists_submission_tasklet(engine);
+		spin_unlock_irqrestore(&engine->active.lock, flags);
+	}
 }
 
 static void execlists_submission_timer(struct timer_list *timer)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index d652ba5d2320..562f756da421 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -161,17 +161,15 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
 	((typeof(ptr))((unsigned long)(ptr) | __bits));			\
 })
 
-#define ptr_count_dec(p_ptr) do {					\
-	typeof(p_ptr) __p = (p_ptr);					\
-	unsigned long __v = (unsigned long)(*__p);			\
-	*__p = (typeof(*p_ptr))(--__v);					\
-} while (0)
-
-#define ptr_count_inc(p_ptr) do {					\
-	typeof(p_ptr) __p = (p_ptr);					\
-	unsigned long __v = (unsigned long)(*__p);			\
-	*__p = (typeof(*p_ptr))(++__v);					\
-} while (0)
+#define ptr_dec(ptr) ({							\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(typeof(ptr))(__v - 1);						\
+})
+
+#define ptr_inc(ptr) ({							\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(typeof(ptr))(__v + 1);						\
+})
 
 #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
 #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] drm/i915: Markup expected timeline locks for i915_active
  2019-08-16  9:24 ` [PATCH 3/3] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
@ 2019-08-16 12:02   ` Mika Kuoppala
  2019-08-16 12:06     ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Mika Kuoppala @ 2019-08-16 12:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> As every i915_active_request should be serialised by a dedicated lock,
> i915_active consists of a tree of locks; one for each node. Markup up
> the i915_active_request with what lock is supposed to be guarding it so
> that we can verify that the serialised updated are indeed serialised.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/display/intel_overlay.c  |  2 +-
>  .../gpu/drm/i915/gem/i915_gem_client_blt.c    |  2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
>  drivers/gpu/drm/i915/gt/intel_context.c       | 11 +++--------
>  drivers/gpu/drm/i915/gt/intel_engine_pool.h   |  2 +-
>  drivers/gpu/drm/i915/gt/intel_timeline.c      |  7 +++----
>  drivers/gpu/drm/i915/gt/selftest_timeline.c   |  4 ++++
>  .../gpu/drm/i915/gt/selftests/mock_timeline.c |  2 +-
>  drivers/gpu/drm/i915/i915_active.c            | 19 +++++++++++++++----
>  drivers/gpu/drm/i915/i915_active.h            | 12 ++++++++++--
>  drivers/gpu/drm/i915/i915_active_types.h      |  3 +++
>  drivers/gpu/drm/i915/i915_vma.c               |  4 ++--
>  drivers/gpu/drm/i915/selftests/i915_active.c  |  3 +--
>  13 files changed, 46 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
> index e1248eace0e1..eca41c4a5aa6 100644
> --- a/drivers/gpu/drm/i915/display/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/display/intel_overlay.c
> @@ -230,7 +230,7 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *))
>  	if (IS_ERR(rq))
>  		return rq;
>  
> -	err = i915_active_ref(&overlay->last_flip, rq->fence.context, rq);
> +	err = i915_active_ref(&overlay->last_flip, rq->timeline, rq);
>  	if (err) {
>  		i915_request_add(rq);
>  		return ERR_PTR(err);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> index 6a4e84157bf2..818ac6915bc5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
> @@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work)
>  	 * keep track of the GPU activity within this vma/request, and
>  	 * propagate the signal from the request to w->dma.
>  	 */
> -	err = i915_active_ref(&vma->active, rq->fence.context, rq);
> +	err = i915_active_ref(&vma->active, rq->timeline, rq);
>  	if (err)
>  		goto out_request;
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index a6b0cb714292..cd1fd2e5423a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -908,7 +908,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>  		if (emit)
>  			err = emit(rq, data);
>  		if (err == 0)
> -			err = i915_active_ref(&cb->base, rq->fence.context, rq);
> +			err = i915_active_ref(&cb->base, rq->timeline, rq);
>  
>  		i915_request_add(rq);
>  		if (err)
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 9114953bf920..f55691d151ae 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -306,10 +306,10 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
>  
>  		/* Queue this switch after current activity by this context. */
>  		err = i915_active_request_set(&tl->last_request, rq);
> +		mutex_unlock(&tl->mutex);
>  		if (err)
> -			goto unlock;
> +			return err;
>  	}
> -	lockdep_assert_held(&tl->mutex);
>  
>  	/*
>  	 * Guarantee context image and the timeline remains pinned until the
> @@ -319,12 +319,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
>  	 * words transfer the pinned ce object to tracked active request.
>  	 */
>  	GEM_BUG_ON(i915_active_is_idle(&ce->active));
> -	err = i915_active_ref(&ce->active, rq->fence.context, rq);
> -
> -unlock:
> -	if (rq->timeline != tl)
> -		mutex_unlock(&tl->mutex);
> -	return err;
> +	return i915_active_ref(&ce->active, rq->timeline, rq);

There seem to have been so much work here for no apparent gains.
Good riddance.

>  }
>  
>  struct i915_request *intel_context_create_request(struct intel_context *ce)
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
> index f7a0a660c1c9..8d069efd9457 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
> @@ -18,7 +18,7 @@ static inline int
>  intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
>  			      struct i915_request *rq)
>  {
> -	return i915_active_ref(&node->active, rq->fence.context, rq);
> +	return i915_active_ref(&node->active, rq->timeline, rq);
>  }
>  
>  static inline void
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index eafd94d5e211..02fbe11b671b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -254,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
>  
>  	mutex_init(&timeline->mutex);
>  
> -	INIT_ACTIVE_REQUEST(&timeline->last_request);
> +	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
>  	INIT_LIST_HEAD(&timeline->requests);
>  
>  	i915_syncmap_init(&timeline->sync);
> @@ -440,8 +440,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
>  	 * free it after the current request is retired, which ensures that
>  	 * all writes into the cacheline from previous requests are complete.
>  	 */
> -	err = i915_active_ref(&tl->hwsp_cacheline->active,
> -			      tl->fence_context, rq);
> +	err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
>  	if (err)
>  		goto err_cacheline;
>  
> @@ -492,7 +491,7 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
>  static int cacheline_ref(struct intel_timeline_cacheline *cl,
>  			 struct i915_request *rq)
>  {
> -	return i915_active_ref(&cl->active, rq->fence.context, rq);
> +	return i915_active_ref(&cl->active, rq->timeline, rq);
>  }
>  
>  int intel_timeline_read_hwsp(struct i915_request *from,
> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> index d54113697745..321481403165 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> @@ -689,7 +689,9 @@ static int live_hwsp_wrap(void *arg)
>  
>  		tl->seqno = -4u;
>  
> +		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
>  		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
> +		mutex_unlock(&tl->mutex);
>  		if (err) {
>  			i915_request_add(rq);
>  			goto out;
> @@ -704,7 +706,9 @@ static int live_hwsp_wrap(void *arg)
>  		}
>  		hwsp_seqno[0] = tl->hwsp_seqno;
>  
> +		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
>  		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
> +		mutex_unlock(&tl->mutex);
>  		if (err) {
>  			i915_request_add(rq);
>  			goto out;
> diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
> index 5c549205828a..598170efcaf6 100644
> --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
> @@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
>  
>  	mutex_init(&timeline->mutex);
>  
> -	INIT_ACTIVE_REQUEST(&timeline->last_request);
> +	INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
>  	INIT_LIST_HEAD(&timeline->requests);
>  
>  	i915_syncmap_init(&timeline->sync);
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index 2439c4f62ad8..df6164591702 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -169,10 +169,11 @@ node_retire(struct i915_active_request *base, struct i915_request *rq)
>  }
>  
>  static struct i915_active_request *
> -active_instance(struct i915_active *ref, u64 idx)
> +active_instance(struct i915_active *ref, struct intel_timeline *tl)
>  {
>  	struct active_node *node, *prealloc;
>  	struct rb_node **p, *parent;
> +	u64 idx = tl->fence_context;
>  
>  	/*
>  	 * We track the most recently used timeline to skip a rbtree search
> @@ -211,7 +212,7 @@ active_instance(struct i915_active *ref, u64 idx)
>  	}
>  
>  	node = prealloc;
> -	i915_active_request_init(&node->base, NULL, node_retire);
> +	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
>  	node->ref = ref;
>  	node->timeline = idx;
>  
> @@ -294,18 +295,20 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
>  }
>  
>  int i915_active_ref(struct i915_active *ref,
> -		    u64 timeline,
> +		    struct intel_timeline *tl,
>  		    struct i915_request *rq)
>  {
>  	struct i915_active_request *active;
>  	int err;
>  
> +	lockdep_assert_held(&tl->mutex);
> +
>  	/* Prevent reaping in case we malloc/wait while building the tree */
>  	err = i915_active_acquire(ref);
>  	if (err)
>  		return err;
>  
> -	active = active_instance(ref, timeline);
> +	active = active_instance(ref, tl);
>  	if (!active) {
>  		err = -ENOMEM;
>  		goto out;
> @@ -596,6 +599,10 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>  				goto unwind;
>  			}
>  
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> +			node->base.lock =
> +				&engine->kernel_context->timeline->mutex;
> +#endif
>  			RCU_INIT_POINTER(node->base.request, NULL);
>  			node->base.retire = node_retire;
>  			node->timeline = idx;
> @@ -701,6 +708,10 @@ int i915_active_request_set(struct i915_active_request *active,
>  {
>  	int err;
>  
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> +	lockdep_assert_held(active->lock);
> +#endif
> +
>  	/* Must maintain ordering wrt previous active requests */
>  	err = i915_request_await_active_request(rq, active);
>  	if (err)
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index f6d730cf2fe6..f95058f99057 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -58,15 +58,20 @@ void i915_active_retire_noop(struct i915_active_request *active,
>   */
>  static inline void
>  i915_active_request_init(struct i915_active_request *active,
> +			 struct mutex *lock,
>  			 struct i915_request *rq,
>  			 i915_active_retire_fn retire)
>  {
>  	RCU_INIT_POINTER(active->request, rq);
>  	INIT_LIST_HEAD(&active->link);
>  	active->retire = retire ?: i915_active_retire_noop;
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> +	active->lock = lock;
> +#endif
>  }
>  
> -#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
> +#define INIT_ACTIVE_REQUEST(name, lock) \
> +	i915_active_request_init((name), (lock), NULL, NULL)
>  
>  /**
>   * i915_active_request_set - updates the tracker to watch the current request
> @@ -81,6 +86,9 @@ static inline void
>  __i915_active_request_set(struct i915_active_request *active,
>  			  struct i915_request *request)
>  {
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> +	lockdep_assert_held(active->lock);
> +#endif
>  	list_move(&active->link, &request->active_list);
>  	rcu_assign_pointer(active->request, request);
>  }
> @@ -362,7 +370,7 @@ void __i915_active_init(struct drm_i915_private *i915,
>  } while (0)
>  
>  int i915_active_ref(struct i915_active *ref,
> -		    u64 timeline,
> +		    struct intel_timeline *tl,
>  		    struct i915_request *rq);
>  
>  int i915_active_wait(struct i915_active *ref);
> diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> index ae3ee441c114..d857bd12aa7e 100644
> --- a/drivers/gpu/drm/i915/i915_active_types.h
> +++ b/drivers/gpu/drm/i915/i915_active_types.h
> @@ -24,6 +24,9 @@ struct i915_active_request {
>  	struct i915_request __rcu *request;
>  	struct list_head link;
>  	i915_active_retire_fn retire;
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> +	struct mutex *lock;

Checkpatch thinks the usage should be somehow stated with comment.

So put something like
/* Incorporeal. Ref piggypacking timeline for lockdep */

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +#endif
>  };
>  
>  struct active_node;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index a32243951b29..73b02234affe 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -903,14 +903,14 @@ int i915_vma_move_to_active(struct i915_vma *vma,
>  	 * add the active reference first and queue for it to be dropped
>  	 * *last*.
>  	 */
> -	err = i915_active_ref(&vma->active, rq->fence.context, rq);
> +	err = i915_active_ref(&vma->active, rq->timeline, rq);
>  	if (unlikely(err))
>  		return err;
>  
>  	if (flags & EXEC_OBJECT_WRITE) {
>  		if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS))
>  			i915_active_ref(&obj->frontbuffer->write,
> -					rq->fence.context,
> +					rq->timeline,
>  					rq);
>  
>  		dma_resv_add_excl_fence(vma->resv, &rq->fence);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
> index e5cd5d47e380..77d844ac8b71 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_active.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_active.c
> @@ -110,8 +110,7 @@ __live_active_setup(struct drm_i915_private *i915)
>  						       submit,
>  						       GFP_KERNEL);
>  		if (err >= 0)
> -			err = i915_active_ref(&active->base,
> -					      rq->fence.context, rq);
> +			err = i915_active_ref(&active->base, rq->timeline, rq);
>  		i915_request_add(rq);
>  		if (err) {
>  			pr_err("Failed to track active ref!\n");
> -- 
> 2.23.0.rc1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 3/3] drm/i915: Markup expected timeline locks for i915_active
  2019-08-16 12:02   ` Mika Kuoppala
@ 2019-08-16 12:06     ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2019-08-16 12:06 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-08-16 13:02:21)
> > diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
> > index ae3ee441c114..d857bd12aa7e 100644
> > --- a/drivers/gpu/drm/i915/i915_active_types.h
> > +++ b/drivers/gpu/drm/i915/i915_active_types.h
> > @@ -24,6 +24,9 @@ struct i915_active_request {
> >       struct i915_request __rcu *request;
> >       struct list_head link;
> >       i915_active_retire_fn retire;
> > +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
> > +     struct mutex *lock;
> 
> Checkpatch thinks the usage should be somehow stated with comment.
> 
> So put something like
> /* Incorporeal. Ref piggypacking timeline for lockdep */

I'm using incorporeal. :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with drm/i915/execlists: Lift process_csb() out of the irq-off spinlock (rev2)
  2019-08-16  9:24 [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
                   ` (5 preceding siblings ...)
  2019-08-16 11:49 ` [PATCH] " Chris Wilson
@ 2019-08-16 16:16 ` Patchwork
  2019-08-16 16:37 ` ✗ Fi.CI.BAT: failure " Patchwork
  7 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-08-16 16:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with drm/i915/execlists: Lift process_csb() out of the irq-off spinlock (rev2)
URL   : https://patchwork.freedesktop.org/series/65294/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
62875a3f6df9 drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
783265de90c7 drm/i915/gt: Mark context->active_count as protected by timeline->mutex
f078e08d7958 drm/i915: Markup expected timeline locks for i915_active
-:291: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#291: FILE: drivers/gpu/drm/i915/i915_active_types.h:28:
+	struct mutex *lock;

total: 0 errors, 0 warnings, 1 checks, 242 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with drm/i915/execlists: Lift process_csb() out of the irq-off spinlock (rev2)
  2019-08-16  9:24 [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
                   ` (6 preceding siblings ...)
  2019-08-16 16:16 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with drm/i915/execlists: Lift process_csb() out of the irq-off spinlock (rev2) Patchwork
@ 2019-08-16 16:37 ` Patchwork
  7 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2019-08-16 16:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with drm/i915/execlists: Lift process_csb() out of the irq-off spinlock (rev2)
URL   : https://patchwork.freedesktop.org/series/65294/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6719 -> Patchwork_14055
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_14055 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_14055, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14055/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_14055:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_blt:
    - fi-bdw-gvtdvm:      [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6719/fi-bdw-gvtdvm/igt@i915_selftest@live_blt.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14055/fi-bdw-gvtdvm/igt@i915_selftest@live_blt.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence:
    - fi-apl-guc:         [PASS][3] -> [DMESG-WARN][4] +1 similar issue
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6719/fi-apl-guc/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14055/fi-apl-guc/igt@kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence.html

  
Known issues
------------

  Here are the changes found in Patchwork_14055 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-6770hq:      [PASS][5] -> [FAIL][6] ([fdo#108511])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6719/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14055/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-b:
    - fi-apl-guc:         [PASS][7] -> [DMESG-WARN][8] ([fdo#103558])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6719/fi-apl-guc/igt@kms_pipe_crc_basic@read-crc-pipe-b.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14055/fi-apl-guc/igt@kms_pipe_crc_basic@read-crc-pipe-b.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-c:
    - fi-apl-guc:         [PASS][9] -> [DMESG-WARN][10] ([fdo#103558] / [fdo#105602])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6719/fi-apl-guc/igt@kms_pipe_crc_basic@read-crc-pipe-c.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14055/fi-apl-guc/igt@kms_pipe_crc_basic@read-crc-pipe-c.html

  
#### Possible fixes ####

  * igt@gem_ctx_exec@basic:
    - fi-icl-u3:          [DMESG-WARN][11] ([fdo#107724]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6719/fi-icl-u3/igt@gem_ctx_exec@basic.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14055/fi-icl-u3/igt@gem_ctx_exec@basic.html

  * igt@kms_frontbuffer_tracking@basic:
    - {fi-icl-guc}:       [FAIL][13] ([fdo#103167]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6719/fi-icl-guc/igt@kms_frontbuffer_tracking@basic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14055/fi-icl-guc/igt@kms_frontbuffer_tracking@basic.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103558]: https://bugs.freedesktop.org/show_bug.cgi?id=103558
  [fdo#105602]: https://bugs.freedesktop.org/show_bug.cgi?id=105602
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#108511]: https://bugs.freedesktop.org/show_bug.cgi?id=108511


Participating hosts (52 -> 46)
------------------------------

  Additional (1): fi-bxt-dsi 
  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6719 -> Patchwork_14055

  CI-20190529: 20190529
  CI_DRM_6719: c84d7192c40db5b0d1c1164abeeefa5efe17c5cd @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5138: b9abe0bf6c478c4f6cac56bff286d6926ad8c0ab @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_14055: f078e08d7958f65b1b2fd74b56a70ce1187a4426 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

f078e08d7958 drm/i915: Markup expected timeline locks for i915_active
783265de90c7 drm/i915/gt: Mark context->active_count as protected by timeline->mutex
62875a3f6df9 drm/i915/execlists: Lift process_csb() out of the irq-off spinlock

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_14055/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
  2019-08-12 11:13 ` [PATCH] " Chris Wilson
@ 2019-08-12 15:29   ` kbuild test robot
  0 siblings, 0 replies; 15+ messages in thread
From: kbuild test robot @ 2019-08-12 15:29 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 5031 bytes --]

Hi Chris,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[cannot apply to v5.3-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-execlists-Lift-process_csb-out-of-the-irq-off-spinlock/20190812-211057
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-defconfig (attached as .config)
compiler: gcc-7 (Debian 7.4.0-10) 7.4.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c: In function 'schedule_in':
>> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:541:2: error: implicit declaration of function 'intel_context_inflight_inc'; did you mean 'intel_context_inflight_count'? [-Werror=implicit-function-declaration]
     intel_context_inflight_inc(rq->hw_context);
     ^~~~~~~~~~~~~~~~~~~~~~~~~~
     intel_context_inflight_count
   drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c: In function 'schedule_out':
>> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:551:2: error: implicit declaration of function 'intel_context_inflight_dec'; did you mean 'intel_context_inflight'? [-Werror=implicit-function-declaration]
     intel_context_inflight_dec(rq->hw_context);
     ^~~~~~~~~~~~~~~~~~~~~~~~~~
     intel_context_inflight
   cc1: some warnings being treated as errors

vim +541 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

77f0d0e925e8a0 drivers/gpu/drm/i915/i915_guc_submission.c        Chris Wilson 2017-05-17  534  
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  535  static struct i915_request *schedule_in(struct i915_request *rq, int idx)
2a694feb93556e drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2018-04-03  536  {
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  537  	trace_i915_request_in(rq, idx);
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  538  
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  539  	if (!rq->hw_context->inflight)
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  540  		rq->hw_context->inflight = rq->engine;
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20 @541  	intel_context_inflight_inc(rq->hw_context);
c7302f204490f3 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c Chris Wilson 2019-08-08  542  	intel_gt_pm_get(rq->engine->gt);
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  543  
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  544  	return i915_request_get(rq);
2a694feb93556e drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2018-04-03  545  }
2a694feb93556e drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2018-04-03  546  
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  547  static void schedule_out(struct i915_request *rq)
2a694feb93556e drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2018-04-03  548  {
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  549  	trace_i915_request_out(rq);
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  550  
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20 @551  	intel_context_inflight_dec(rq->hw_context);
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  552  	if (!intel_context_inflight_count(rq->hw_context))
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  553  		rq->hw_context->inflight = NULL;
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  554  
c7302f204490f3 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c Chris Wilson 2019-08-08  555  	intel_gt_pm_put(rq->engine->gt);
22b7a426bbe1eb drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2019-06-20  556  	i915_request_put(rq);
2a694feb93556e drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2018-04-03  557  }
2a694feb93556e drivers/gpu/drm/i915/intel_guc_submission.c       Chris Wilson 2018-04-03  558  

:::::: The code at line 541 was first introduced by commit
:::::: 22b7a426bbe1ebe1520f92da4cd1617d1e1b5fc4 drm/i915/execlists: Preempt-to-busy

:::::: TO: Chris Wilson <chris@chris-wilson.co.uk>
:::::: CC: Chris Wilson <chris@chris-wilson.co.uk>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28082 bytes --]

[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock
  2019-08-12  9:10 [PATCH 8/8] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
@ 2019-08-12 11:13 ` Chris Wilson
  2019-08-12 15:29   ` kbuild test robot
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2019-08-12 11:13 UTC (permalink / raw)
  To: intel-gfx

If we only call process_csb() from the tasklet, though we lose the
ability to bypass ksoftirqd interrupt processing on direct submission paths,
we can push it out of the irq-off spinlock.

The penalty is that we then allow schedule_out to be called concurrently
with schedule_in requiring us to handle the usage count (baked into the
pointer itself) atomically.

As we do kick the tasklets (via local_bh_enable()) after our submission,
there is a possibility there to see if we can pull the local softirq
processing back from the ksoftirqd.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
We have to restrict enable_timeslice() to only consider the information
under its control (i.e. execlists->active[])
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 130 +++++++++++-------
 drivers/gpu/drm/i915/i915_utils.h             |  20 ++-
 4 files changed, 94 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index a632b20ec4d8..d8ce266c049f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -41,9 +41,7 @@ struct intel_context {
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
 #define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
-#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
-#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
-#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
+#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
 
 	struct i915_address_space *vm;
 	struct i915_gem_context *gem_context;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c7b241417ee1..13a569907c3d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1459,7 +1459,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 		for (port = execlists->pending; (rq = *port); port++) {
 			/* Exclude any contexts already counted in active */
-			if (intel_context_inflight_count(rq->hw_context) == 1)
+			if (!intel_context_inflight_count(rq->hw_context))
 				engine->stats.active++;
 		}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 5c26c4ae139b..945f3acc2e75 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -547,27 +547,39 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
+static inline struct intel_engine_cs *
+__execlists_schedule_in(struct i915_request *rq)
+{
+	struct intel_engine_cs * const engine = rq->engine;
+	struct intel_context * const ce = rq->hw_context;
+
+	intel_context_get(ce);
+
+	intel_gt_pm_get(engine->gt);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+	intel_engine_context_in(engine);
+
+	return engine;
+}
+
 static inline struct i915_request *
 execlists_schedule_in(struct i915_request *rq, int idx)
 {
-	struct intel_context *ce = rq->hw_context;
-	int count;
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *old;
 
+	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
 	trace_i915_request_in(rq, idx);
 
-	count = intel_context_inflight_count(ce);
-	if (!count) {
-		intel_context_get(ce);
-		ce->inflight = rq->engine;
-
-		intel_gt_pm_get(ce->inflight->gt);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-		intel_engine_context_in(ce->inflight);
-	}
+	old = READ_ONCE(ce->inflight);
+	do {
+		if (!old) {
+			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+			break;
+		}
+	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
 
-	intel_context_inflight_inc(ce);
 	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
-
 	return i915_request_get(rq);
 }
 
@@ -581,35 +593,45 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 }
 
 static inline void
-execlists_schedule_out(struct i915_request *rq)
+__execlists_schedule_out(struct i915_request *rq)
 {
-	struct intel_context *ce = rq->hw_context;
+	struct intel_engine_cs * const engine = rq->engine;
+	struct intel_context * const ce = rq->hw_context;
 
-	GEM_BUG_ON(!intel_context_inflight_count(ce));
+	intel_engine_context_out(engine);
+	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+	intel_gt_pm_put(engine->gt);
 
-	trace_i915_request_out(rq);
+	/*
+	 * If this is part of a virtual engine, its next request may
+	 * have been blocked waiting for access to the active context.
+	 * We have to kick all the siblings again in case we need to
+	 * switch (e.g. the next request is not runnable on this
+	 * engine). Hopefully, we will already have submitted the next
+	 * request before the tasklet runs and do not need to rebuild
+	 * each virtual tree and kick everyone again.
+	 */
+	if (ce->engine != engine)
+		kick_siblings(rq, ce);
 
-	intel_context_inflight_dec(ce);
-	if (!intel_context_inflight_count(ce)) {
-		intel_engine_context_out(ce->inflight);
-		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
-		intel_gt_pm_put(ce->inflight->gt);
+	intel_context_put(ce);
+}
 
-		/*
-		 * If this is part of a virtual engine, its next request may
-		 * have been blocked waiting for access to the active context.
-		 * We have to kick all the siblings again in case we need to
-		 * switch (e.g. the next request is not runnable on this
-		 * engine). Hopefully, we will already have submitted the next
-		 * request before the tasklet runs and do not need to rebuild
-		 * each virtual tree and kick everyone again.
-		 */
-		ce->inflight = NULL;
-		if (rq->engine != ce->engine)
-			kick_siblings(rq, ce);
+static inline void
+execlists_schedule_out(struct i915_request *rq)
+{
+	struct intel_context * const ce = rq->hw_context;
+	struct intel_engine_cs *cur, *old;
 
-		intel_context_put(ce);
-	}
+	trace_i915_request_out(rq);
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+
+	old = READ_ONCE(ce->inflight);
+	do
+		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
+	while (!try_cmpxchg(&ce->inflight, &old, cur));
+	if (!cur)
+		__execlists_schedule_out(rq);
 
 	i915_request_put(rq);
 }
@@ -684,6 +706,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
 
 	trace_ports(execlists, msg, execlists->pending);
 
+	if (!execlists->pending[0])
+		return false;
+
 	if (execlists->pending[execlists_num_ports(execlists)])
 		return false;
 
@@ -944,9 +969,21 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
 static bool
 enable_timeslice(struct intel_engine_cs *engine)
 {
-	struct i915_request *last = last_active(&engine->execlists);
+	struct i915_request * const *port;
+	int hint;
+
+	port = engine->execlists.active;
+	while (port[0] && i915_request_completed(port[0]))
+		port++;
+	if (!port[0])
+		return false;
 
-	return last && need_timeslice(engine, last);
+	hint = engine->execlists.queue_priority_hint;
+	if (port[1])
+		hint = max(rq_prio(port[1]), hint);
+
+	/* Compare the two end-points as an unlocked approximation */
+	return hint >= effective_prio(port[0]);
 }
 
 static void record_preemption(struct intel_engine_execlists *execlists)
@@ -1356,7 +1393,6 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
-	lockdep_assert_held(&engine->active.lock);
 	GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
 
 	/*
@@ -1427,15 +1463,14 @@ static void process_csb(struct intel_engine_cs *engine)
 				       execlists->pending,
 				       execlists_num_ports(execlists) *
 				       sizeof(*execlists->pending));
-			execlists->pending[0] = NULL;
-
-			trace_ports(execlists, "promoted", execlists->active);
 
 			if (enable_timeslice(engine))
 				mod_timer(&execlists->timer, jiffies + 1);
 
 			if (!inject_preempt_hang(execlists))
 				ring_set_paused(engine, 0);
+
+			WRITE_ONCE(execlists->pending[0], NULL);
 			break;
 
 		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
@@ -1479,8 +1514,6 @@ static void process_csb(struct intel_engine_cs *engine)
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
 	lockdep_assert_held(&engine->active.lock);
-
-	process_csb(engine);
 	if (!engine->execlists.pending[0])
 		execlists_dequeue(engine);
 }
@@ -1494,9 +1527,12 @@ static void execlists_submission_tasklet(unsigned long data)
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->active.lock, flags);
-	__execlists_submission_tasklet(engine);
-	spin_unlock_irqrestore(&engine->active.lock, flags);
+	process_csb(engine);
+	if (!engine->execlists.pending[0]) {
+		spin_lock_irqsave(&engine->active.lock, flags);
+		__execlists_submission_tasklet(engine);
+		spin_unlock_irqrestore(&engine->active.lock, flags);
+	}
 }
 
 static void execlists_submission_timer(struct timer_list *timer)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index d652ba5d2320..562f756da421 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -161,17 +161,15 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
 	((typeof(ptr))((unsigned long)(ptr) | __bits));			\
 })
 
-#define ptr_count_dec(p_ptr) do {					\
-	typeof(p_ptr) __p = (p_ptr);					\
-	unsigned long __v = (unsigned long)(*__p);			\
-	*__p = (typeof(*p_ptr))(--__v);					\
-} while (0)
-
-#define ptr_count_inc(p_ptr) do {					\
-	typeof(p_ptr) __p = (p_ptr);					\
-	unsigned long __v = (unsigned long)(*__p);			\
-	*__p = (typeof(*p_ptr))(++__v);					\
-} while (0)
+#define ptr_dec(ptr) ({							\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(typeof(ptr))(__v - 1);						\
+})
+
+#define ptr_inc(ptr) ({							\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(typeof(ptr))(__v + 1);						\
+})
 
 #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
 #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
-- 
2.23.0.rc1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2019-08-16 16:37 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-16  9:24 [PATCH 1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
2019-08-16  9:24 ` [PATCH 2/3] drm/i915/gt: Mark context->active_count as protected by timeline->mutex Chris Wilson
2019-08-16 11:48   ` Mika Kuoppala
2019-08-16  9:24 ` [PATCH 3/3] drm/i915: Markup expected timeline locks for i915_active Chris Wilson
2019-08-16 12:02   ` Mika Kuoppala
2019-08-16 12:06     ` Chris Wilson
2019-08-16 11:05 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Patchwork
2019-08-16 11:26 ` ✗ Fi.CI.BAT: failure " Patchwork
2019-08-16 11:28   ` Chris Wilson
2019-08-16 11:38 ` [PATCH 1/3] " Mika Kuoppala
2019-08-16 11:49 ` [PATCH] " Chris Wilson
2019-08-16 16:16 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with drm/i915/execlists: Lift process_csb() out of the irq-off spinlock (rev2) Patchwork
2019-08-16 16:37 ` ✗ Fi.CI.BAT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2019-08-12  9:10 [PATCH 8/8] drm/i915/execlists: Lift process_csb() out of the irq-off spinlock Chris Wilson
2019-08-12 11:13 ` [PATCH] " Chris Wilson
2019-08-12 15:29   ` kbuild test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.