All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine
@ 2021-02-07 15:03 Chris Wilson
  2021-02-07 15:03 ` [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler Chris Wilson
                   ` (5 more replies)
  0 siblings, 6 replies; 12+ messages in thread
From: Chris Wilson @ 2021-02-07 15:03 UTC (permalink / raw)
  To: intel-gfx

Claim the submit_request vfunc as the entry point into the scheduler
backend for ready requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h         |  8 --------
 drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 11 ++++++-----
 drivers/gpu/drm/i915/gt/intel_reset.c                |  2 +-
 drivers/gpu/drm/i915/gt/intel_ring_submission.c      |  4 ++--
 drivers/gpu/drm/i915/gt/mock_engine.c                |  4 +++-
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c    |  2 +-
 drivers/gpu/drm/i915/i915_request.c                  |  2 +-
 drivers/gpu/drm/i915/i915_scheduler.c                |  2 ++
 drivers/gpu/drm/i915/i915_scheduler_types.h          |  9 +++++++++
 drivers/gpu/drm/i915/selftests/i915_request.c        |  3 +--
 10 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d5f917462f0e..7efa6290cc3e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -417,14 +417,6 @@ struct intel_engine_cs {
 						 u32 *cs);
 	unsigned int	emit_fini_breadcrumb_dw;
 
-	/* Pass the request to the hardware queue (e.g. directly into
-	 * the legacy ringbuffer or to the end of an execlist).
-	 *
-	 * This is called from an atomic context with irqs disabled; must
-	 * be irq safe.
-	 */
-	void		(*submit_request)(struct i915_request *rq);
-
 	/*
 	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
 	 * request down to the bonded pairs.
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index f8dca5f2f9b2..02aa3eba4ebb 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -484,7 +484,7 @@ resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve)
 
 	clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 	WRITE_ONCE(rq->engine, &ve->base);
-	ve->base.submit_request(rq);
+	ve->base.sched.submit_request(rq);
 
 	spin_unlock_irq(&se->lock);
 }
@@ -2763,7 +2763,7 @@ static bool can_preempt(struct intel_engine_cs *engine)
 
 static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
-	engine->submit_request = i915_request_enqueue;
+	engine->sched.submit_request = i915_request_enqueue;
 	engine->sched.tasklet.callback = execlists_submission_tasklet;
 }
 
@@ -3231,7 +3231,7 @@ static void virtual_submit_request(struct i915_request *rq)
 		     rq->fence.context,
 		     rq->fence.seqno);
 
-	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
+	GEM_BUG_ON(ve->base.sched.submit_request != virtual_submit_request);
 
 	spin_lock_irqsave(&se->lock, flags);
 
@@ -3345,12 +3345,10 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 	ve->base.cops = &virtual_context_ops;
 	ve->base.request_alloc = execlists_request_alloc;
 
-	ve->base.submit_request = virtual_submit_request;
 	ve->base.bond_execute = virtual_bond_execute;
 
 	INIT_LIST_HEAD(virtual_queue(ve));
 	ve->base.execlists.queue_priority_hint = INT_MIN;
-	tasklet_setup(&ve->base.sched.tasklet, virtual_submission_tasklet);
 
 	intel_context_init(&ve->context, &ve->base);
 
@@ -3431,6 +3429,9 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
 			ve->base.mask,
 			ENGINE_VIRTUAL);
 
+	ve->base.sched.submit_request = virtual_submit_request;
+	tasklet_setup(&ve->base.sched.tasklet, virtual_submission_tasklet);
+
 	virtual_engine_initial_hint(ve);
 	return &ve->context;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index bf5b9f303a68..990cb4adbb9a 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -820,7 +820,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
 		__intel_gt_reset(gt, ALL_ENGINES);
 
 	for_each_engine(engine, gt, id)
-		engine->submit_request = nop_submit_request;
+		engine->sched.submit_request = nop_submit_request;
 
 	/*
 	 * Make sure no request can slip through without getting completed by
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 4a7d3420cc9d..cf3bbcbe7520 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -966,12 +966,12 @@ static void gen6_bsd_submit_request(struct i915_request *request)
 
 static void i9xx_set_default_submission(struct intel_engine_cs *engine)
 {
-	engine->submit_request = i9xx_submit_request;
+	engine->sched.submit_request = i9xx_submit_request;
 }
 
 static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 {
-	engine->submit_request = gen6_bsd_submit_request;
+	engine->sched.submit_request = gen6_bsd_submit_request;
 }
 
 static void ring_release(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 2081deed94b7..5662f7c2f719 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -301,7 +301,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.request_alloc = mock_request_alloc;
 	engine->base.emit_flush = mock_emit_flush;
 	engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
-	engine->base.submit_request = mock_submit_request;
+
+	engine->base.sched.submit_request = mock_submit_request;
 
 	engine->base.reset.prepare = mock_reset_prepare;
 	engine->base.reset.rewind = mock_reset_rewind;
@@ -332,6 +333,7 @@ int mock_engine_init(struct intel_engine_cs *engine)
 			engine->name,
 			engine->mask,
 			ENGINE_MOCK);
+	engine->sched.submit_request = mock_submit_request;
 
 	intel_engine_init_execlists(engine);
 	intel_engine_init__pm(engine);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index cf99715e194d..c66c867ada23 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -558,7 +558,7 @@ static int guc_resume(struct intel_engine_cs *engine)
 
 static void guc_set_default_submission(struct intel_engine_cs *engine)
 {
-	engine->submit_request = i915_request_enqueue;
+	engine->sched.submit_request = i915_request_enqueue;
 }
 
 static void guc_release(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 1b52dcaa023d..c03d3cedf497 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -700,7 +700,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 		 * proceeding.
 		 */
 		rcu_read_lock();
-		request->engine->submit_request(request);
+		i915_request_get_scheduler(request)->submit_request(request);
 		rcu_read_unlock();
 		break;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index ba308e937109..e8db7e614ff5 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -132,6 +132,8 @@ void i915_sched_init(struct i915_sched *se,
 	se->queue = RB_ROOT_CACHED;
 
 	init_ipi(&se->ipi);
+
+	se->submit_request = i915_request_enqueue;
 }
 
 void i915_sched_park(struct i915_sched *se)
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 3e2e47298bc6..2d746af501d6 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -28,6 +28,15 @@ struct i915_sched {
 
 	unsigned long mask; /* available scheduling channels */
 
+	/*
+	 * Pass the request to the submission backend (e.g. directly into
+	 * the legacy ringbuffer, or to the end of an execlist, or to the GuC).
+	 *
+	 * This is called from an atomic context with irqs disabled; must
+	 * be irq safe.
+	 */
+	void (*submit_request)(struct i915_request *rq);
+
 	struct list_head requests; /* active request, on HW */
 	struct list_head hold; /* ready requests, but on hold */
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 39c619bccb74..8035ea7565ed 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -242,10 +242,9 @@ static int igt_request_rewind(void *arg)
 	i915_request_get(vip);
 	i915_request_add(vip);
 	rcu_read_lock();
-	request->engine->submit_request(request);
+	i915_request_get_scheduler(request)->submit_request(request);
 	rcu_read_unlock();
 
-
 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
 		pr_err("timed out waiting for high priority request\n");
 		goto err;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler
  2021-02-07 15:03 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
@ 2021-02-07 15:03 ` Chris Wilson
  2021-02-07 15:03 ` [Intel-gfx] [CI 3/4] drm/i915: Show execlists queues when dumping state Chris Wilson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2021-02-07 15:03 UTC (permalink / raw)
  To: intel-gfx

Since finding the currently active request starts by walking the
scheduler lists under the scheduler lock, move the routine to the
scheduler.

v2: Wrap se->active() with i915_sched_get_active_request()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |   3 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 112 +++---------------
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  32 ++++-
 .../drm/i915/gt/intel_execlists_submission.c  |  17 ++-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  23 ++--
 drivers/gpu/drm/i915/i915_gpu_error.c         |  18 ++-
 drivers/gpu/drm/i915/i915_gpu_error.h         |   4 +-
 drivers/gpu/drm/i915/i915_request.c           |  71 +----------
 drivers/gpu/drm/i915/i915_request.h           |  16 +++
 drivers/gpu/drm/i915/i915_scheduler.c         |  24 ++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  12 ++
 drivers/gpu/drm/i915/i915_scheduler_types.h   |   2 +
 12 files changed, 143 insertions(+), 191 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 52bba16c62e8..c530839627bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -230,9 +230,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
 				   ktime_t *now);
 
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine);
-
 u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
 
 void intel_engine_init_active(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 3b299339fb62..f6596d454d46 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1175,31 +1175,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 	}
 }
 
-static bool ring_is_idle(struct intel_engine_cs *engine)
-{
-	bool idle = true;
-
-	if (I915_SELFTEST_ONLY(!engine->mmio_base))
-		return true;
-
-	if (!intel_engine_pm_get_if_awake(engine))
-		return true;
-
-	/* First check that no commands are left in the ring */
-	if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
-	    (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
-		idle = false;
-
-	/* No bit for gen2, so assume the CS parser is idle */
-	if (INTEL_GEN(engine->i915) > 2 &&
-	    !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
-		idle = false;
-
-	intel_engine_pm_put(engine);
-
-	return idle;
-}
-
 /**
  * intel_engine_is_idle() - Report if the engine has finished process all work
  * @engine: the intel_engine_cs
@@ -1210,14 +1185,12 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 bool intel_engine_is_idle(struct intel_engine_cs *engine)
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
+	struct i915_request *rq = NULL;
 
 	/* More white lies, if wedged, hw state is inconsistent */
 	if (intel_gt_is_wedged(engine->gt))
 		return true;
 
-	if (!intel_engine_pm_is_awake(engine))
-		return true;
-
 	/* Waiting to drain ELSP? */
 	synchronize_hardirq(to_pci_dev(engine->i915->drm.dev)->irq);
 	i915_sched_flush(se);
@@ -1226,8 +1199,16 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (!i915_sched_is_idle(se))
 		return false;
 
-	/* Ring stopped? */
-	return ring_is_idle(engine);
+	/* Execution complete? */
+	if (intel_engine_pm_get_if_awake(engine)) {
+		spin_lock_irq(&se->lock);
+		rq = i915_sched_get_active_request(se);
+		spin_unlock_irq(&se->lock);
+
+		intel_engine_pm_put(engine);
+	}
+
+	return !rq;
 }
 
 bool intel_engines_are_idle(struct intel_gt *gt)
@@ -1284,7 +1265,7 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }
 
-static struct intel_timeline *get_timeline(struct i915_request *rq)
+static struct intel_timeline *get_timeline(const struct i915_request *rq)
 {
 	struct intel_timeline *tl;
 
@@ -1512,7 +1493,8 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 	}
 }
 
-static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
+static void
+print_request_ring(struct drm_printer *m, const struct i915_request *rq)
 {
 	void *ring;
 	int size;
@@ -1597,7 +1579,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 {
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	intel_wakeref_t wakeref;
 	unsigned long flags;
 	ktime_t dummy;
@@ -1638,8 +1620,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	drm_printf(m, "\tRequests:\n");
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	rq = i915_sched_get_active_request(se);
 	if (rq) {
 		struct intel_timeline *tl = get_timeline(rq);
 
@@ -1671,6 +1654,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	}
 	drm_printf(m, "\tOn hold?: %lu\n", list_count(&se->hold));
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 
 	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
 	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
@@ -1719,66 +1703,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
 	return ktime_add(total, start);
 }
 
-static bool match_ring(struct i915_request *rq)
-{
-	u32 ring = ENGINE_READ(rq->engine, RING_START);
-
-	return ring == i915_ggtt_offset(rq->ring->vma);
-}
-
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine)
-{
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *request, *active = NULL;
-
-	/*
-	 * We are called by the error capture, reset and to dump engine
-	 * state at random points in time. In particular, note that neither is
-	 * crucially ordered with an interrupt. After a hang, the GPU is dead
-	 * and we assume that no more writes can happen (we waited long enough
-	 * for all writes that were in transaction to be flushed) - adding an
-	 * extra delay for a recent interrupt is pointless. Hence, we do
-	 * not need an engine->irq_seqno_barrier() before the seqno reads.
-	 * At all other times, we must assume the GPU is still running, but
-	 * we only care about the snapshot of this moment.
-	 */
-	lockdep_assert_held(&se->lock);
-
-	rcu_read_lock();
-	request = execlists_active(&engine->execlists);
-	if (request) {
-		struct intel_timeline *tl = request->context->timeline;
-
-		list_for_each_entry_from_reverse(request, &tl->requests, link) {
-			if (__i915_request_is_complete(request))
-				break;
-
-			active = request;
-		}
-	}
-	rcu_read_unlock();
-	if (active)
-		return active;
-
-	list_for_each_entry(request, &se->requests, sched.link) {
-		if (__i915_request_is_complete(request))
-			continue;
-
-		if (!__i915_request_has_started(request))
-			continue;
-
-		/* More than one preemptible request may match! */
-		if (!match_ring(request))
-			continue;
-
-		active = request;
-		break;
-	}
-
-	return active;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "mock_engine.c"
 #include "selftest_engine.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index b6dbd1150ba9..0b062fad1837 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -78,6 +78,21 @@ static void show_heartbeat(const struct i915_request *rq,
 			  rq->sched.attr.priority);
 }
 
+static bool is_wait(struct intel_engine_cs *engine)
+{
+	struct i915_sched *se = intel_engine_get_scheduler(engine);
+	struct i915_request *rq;
+	bool wait = true;
+
+	spin_lock_irq(&se->lock);
+	rq = i915_sched_get_active_request(se);
+	if (rq)
+		wait = !i915_sw_fence_signaled(&rq->submit);
+	spin_unlock_irq(&se->lock);
+
+	return wait;
+}
+
 static void heartbeat(struct work_struct *wrk)
 {
 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
@@ -92,6 +107,9 @@ static void heartbeat(struct work_struct *wrk)
 
 	rq = engine->heartbeat.systole;
 	if (rq && i915_request_completed(rq)) {
+		ENGINE_TRACE(engine,
+			     "heartbeat " RQ_FMT "completed\n",
+			     RQ_ARG(rq));
 		i915_request_put(rq);
 		engine->heartbeat.systole = NULL;
 	}
@@ -110,7 +128,7 @@ static void heartbeat(struct work_struct *wrk)
 				rq->emitted_jiffies + msecs_to_jiffies(delay)))
 			goto out;
 
-		if (!i915_sw_fence_signaled(&rq->submit)) {
+		if (!i915_sw_fence_signaled(&rq->submit) && is_wait(engine)) {
 			/*
 			 * Not yet submitted, system is stalled.
 			 *
@@ -121,6 +139,9 @@ static void heartbeat(struct work_struct *wrk)
 			 * but all other contexts, including the kernel
 			 * context are stuck waiting for the signal.
 			 */
+			ENGINE_TRACE(engine,
+				     "heartbeat " RQ_FMT " pending\n",
+				     RQ_ARG(rq));
 		} else if (rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
 			/*
 			 * Gradually raise the priority of the heartbeat to
@@ -134,10 +155,18 @@ static void heartbeat(struct work_struct *wrk)
 			if (rq->sched.attr.priority >= attr.priority)
 				attr.priority = I915_PRIORITY_BARRIER;
 
+			ENGINE_TRACE(engine,
+				     "bumping heartbeat " RQ_FMT " prio:%d\n",
+				     RQ_ARG(rq), attr.priority);
+
 			local_bh_disable();
 			i915_request_set_priority(rq, attr.priority);
 			local_bh_enable();
 		} else {
+			ENGINE_TRACE(engine,
+				     "heartbeat " RQ_FMT " stuck\n",
+				     RQ_ARG(rq));
+
 			if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 				show_heartbeat(rq, engine);
 
@@ -169,6 +198,7 @@ static void heartbeat(struct work_struct *wrk)
 	if (IS_ERR(rq))
 		goto unlock;
 
+	ENGINE_TRACE(engine, "heartbeat " RQ_FMT "started\n", RQ_ARG(rq));
 	heartbeat_commit(rq, &attr);
 
 unlock:
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 02aa3eba4ebb..f110348f2ae2 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2376,7 +2376,7 @@ static void sanitize_hwsp(struct intel_engine_cs *engine)
 
 static void execlists_sanitize(struct intel_engine_cs *engine)
 {
-	GEM_BUG_ON(execlists_active(&engine->execlists));
+	GEM_BUG_ON(*engine->execlists.active);
 
 	/*
 	 * Poison residual state on resume, in case the suspend didn't!
@@ -2752,6 +2752,20 @@ static void execlists_park(struct intel_engine_cs *engine)
 	cancel_timer(&engine->execlists.preempt);
 }
 
+static struct i915_request *
+execlists_active_request(const struct i915_sched *se)
+{
+	const struct intel_engine_cs *engine =
+		container_of(se, typeof(*engine), sched);
+	struct i915_request *rq;
+
+	rq = execlists_active(&engine->execlists);
+	if (rq)
+		rq = active_request(rq->context->timeline, rq);
+
+	return rq;
+}
+
 static bool can_preempt(struct intel_engine_cs *engine)
 {
 	if (INTEL_GEN(engine->i915) > 8)
@@ -2888,6 +2902,7 @@ static void init_execlists(struct intel_engine_cs *engine)
 	struct intel_uncore *uncore = engine->uncore;
 	u32 base = engine->mmio_base;
 
+	engine->sched.active_request = execlists_active_request;
 	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
 
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index cf3bbcbe7520..282089d64789 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -324,20 +324,11 @@ static void reset_prepare(struct intel_engine_cs *engine)
 static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *pos, *rq;
+	struct i915_request *rq;
 	unsigned long flags;
 	u32 head;
 
-	rq = NULL;
 	spin_lock_irqsave(&se->lock, flags);
-	rcu_read_lock();
-	list_for_each_entry(pos, &se->requests, sched.link) {
-		if (!__i915_request_is_complete(pos)) {
-			rq = pos;
-			break;
-		}
-	}
-	rcu_read_unlock();
 
 	/*
 	 * The guilty request will get skipped on a hung engine.
@@ -361,6 +352,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 	 * subsequent hangs.
 	 */
 
+	rq = i915_sched_get_active_request(se);
 	if (rq) {
 		/*
 		 * Try to restore the logical GPU state to match the
@@ -379,9 +371,20 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 		 */
 		__i915_request_reset(rq, stalled);
 
+		ENGINE_TRACE(engine,
+			     "active (guilty:%s) request, replaying HEAD:%x\n",
+			     yesno(stalled), rq->head);
+
 		GEM_BUG_ON(rq->ring != engine->legacy.ring);
 		head = rq->head;
 	} else {
+		ENGINE_TRACE(engine, "idle reset, clearing ring, HEAD:%x\n",
+			     engine->legacy.ring->tail);
+
+		/* Sometimes the GPU just dies; cleanup afterwards */
+		list_for_each_entry(rq, &se->requests, sched.link)
+			i915_request_put(i915_request_mark_eio(rq));
+
 		head = engine->legacy.ring->tail;
 	}
 	engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f8c50195b330..291f5b818925 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1262,15 +1262,11 @@ static bool record_context(struct i915_gem_context_coredump *e,
 	struct i915_gem_context *ctx;
 	bool simulated;
 
-	rcu_read_lock();
-
 	ctx = rcu_dereference(rq->context->gem_context);
 	if (ctx && !kref_get_unless_zero(&ctx->ref))
 		ctx = NULL;
-	if (!ctx) {
-		rcu_read_unlock();
+	if (!ctx)
 		return true;
-	}
 
 	if (I915_SELFTEST_ONLY(!ctx->client)) {
 		strcpy(e->comm, "[kernel]");
@@ -1279,8 +1275,6 @@ static bool record_context(struct i915_gem_context_coredump *e,
 		e->pid = pid_nr(i915_drm_client_pid(ctx->client));
 	}
 
-	rcu_read_unlock();
-
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
@@ -1368,12 +1362,14 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	struct intel_engine_capture_vma *vma = NULL;
 
+	rcu_read_lock();
 	ee->simulated |= record_context(&ee->context, rq);
+	rcu_read_unlock();
 	if (ee->simulated)
 		return NULL;
 
@@ -1436,19 +1432,21 @@ capture_engine(struct intel_engine_cs *engine,
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	struct intel_engine_capture_vma *capture = NULL;
 	struct intel_engine_coredump *ee;
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	unsigned long flags;
 
 	ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
 	if (!ee)
 		return NULL;
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	rq = i915_sched_get_active_request(se);
 	if (rq)
 		capture = intel_engine_coredump_add_request(ee, rq,
 							    ATOMIC_MAYFAIL);
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 	if (!capture) {
 		kfree(ee);
 		return NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 1764fd254df3..2d8debabfe28 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -235,7 +235,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp);
 
 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
@@ -299,7 +299,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 static inline struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	return NULL;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c03d3cedf497..792dd0bbea3b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -349,74 +349,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
-static struct i915_request * const *
-__engine_active(struct intel_engine_cs *engine)
-{
-	return READ_ONCE(engine->execlists.active);
-}
-
-static bool __request_in_flight(const struct i915_request *signal)
-{
-	struct i915_request * const *port, *rq;
-	bool inflight = false;
-
-	if (!i915_request_is_ready(signal))
-		return false;
-
-	/*
-	 * Even if we have unwound the request, it may still be on
-	 * the GPU (preempt-to-busy). If that request is inside an
-	 * unpreemptible critical section, it will not be removed. Some
-	 * GPU functions may even be stuck waiting for the paired request
-	 * (__await_execution) to be submitted and cannot be preempted
-	 * until the bond is executing.
-	 *
-	 * As we know that there are always preemption points between
-	 * requests, we know that only the currently executing request
-	 * may be still active even though we have cleared the flag.
-	 * However, we can't rely on our tracking of ELSP[0] to know
-	 * which request is currently active and so maybe stuck, as
-	 * the tracking maybe an event behind. Instead assume that
-	 * if the context is still inflight, then it is still active
-	 * even if the active flag has been cleared.
-	 *
-	 * To further complicate matters, if there a pending promotion, the HW
-	 * may either perform a context switch to the second inflight execlists,
-	 * or it may switch to the pending set of execlists. In the case of the
-	 * latter, it may send the ACK and we process the event copying the
-	 * pending[] over top of inflight[], _overwriting_ our *active. Since
-	 * this implies the HW is arbitrating and not struck in *active, we do
-	 * not worry about complete accuracy, but we do require no read/write
-	 * tearing of the pointer [the read of the pointer must be valid, even
-	 * as the array is being overwritten, for which we require the writes
-	 * to avoid tearing.]
-	 *
-	 * Note that the read of *execlists->active may race with the promotion
-	 * of execlists->pending[] to execlists->inflight[], overwritting
-	 * the value at *execlists->active. This is fine. The promotion implies
-	 * that we received an ACK from the HW, and so the context is not
-	 * stuck -- if we do not see ourselves in *active, the inflight status
-	 * is valid. If instead we see ourselves being copied into *active,
-	 * we are inflight and may signal the callback.
-	 */
-	if (!intel_context_inflight(signal->context))
-		return false;
-
-	rcu_read_lock();
-	for (port = __engine_active(signal->engine);
-	     (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
-	     port++) {
-		if (rq->context == signal->context) {
-			inflight = i915_seqno_passed(rq->fence.seqno,
-						     signal->fence.seqno);
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	return inflight;
-}
-
 static int
 __await_execution(struct i915_request *rq,
 		  struct i915_request *signal,
@@ -460,8 +392,7 @@ __await_execution(struct i915_request *rq,
 	 * the completed/retired request.
 	 */
 	if (llist_add(&cb->work.node.llist, &signal->execute_cb)) {
-		if (i915_request_is_active(signal) ||
-		    __request_in_flight(signal))
+		if (i915_request_is_executing(signal))
 			__notify_execute_cb_imm(signal);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c41582b96b46..7e722ccc9c4b 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -629,4 +629,20 @@ static inline bool i915_request_use_scheduler(const struct i915_request *rq)
 	return intel_engine_has_scheduler(rq->engine);
 }
 
+static inline bool i915_request_is_executing(const struct i915_request *rq)
+{
+	/* Is the request presently on the HW execution queue? */
+	if (i915_request_is_active(rq))
+		return true;
+
+	/*
+	 * However, if it is not presently on the HW execution queue, it
+	 * may have been recently removed from the queue, but is in fact
+	 * still executing until the HW has completed a preemption. We
+	 * need to double check with the backend for it to query the HW
+	 * to see if the request is still executing.
+	 */
+	return intel_context_inflight(rq->context);
+}
+
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index e8db7e614ff5..c1674a84ea74 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -112,6 +112,29 @@ static void init_ipi(struct i915_sched_ipi *ipi)
 	ipi->list = NULL;
 }
 
+static struct i915_request *
+i915_sched_default_active_request(const struct i915_sched *se)
+{
+	struct i915_request *rq, *active = NULL;
+
+	/*
+	 * We assume the simplest in-order execution queue with no preemption,
+	 * i.e. the order of se->erquests matches exactly the execution order
+	 * of the HW.
+	 */
+	list_for_each_entry(rq, &se->requests, sched.link) {
+		if (__i915_request_is_complete(rq))
+			continue;
+
+		if (__i915_request_has_started(rq))
+			active = rq;
+
+		break;
+	}
+
+	return active;
+}
+
 void i915_sched_init(struct i915_sched *se,
 		     struct device *dev,
 		     const char *name,
@@ -134,6 +157,7 @@ void i915_sched_init(struct i915_sched *se,
 	init_ipi(&se->ipi);
 
 	se->submit_request = i915_request_enqueue;
+	se->active_request = i915_sched_default_active_request;
 }
 
 void i915_sched_park(struct i915_sched *se)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 1803fc37bada..1095f2e9fb1b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -138,6 +138,18 @@ static inline void i915_sched_flush(struct i915_sched *se)
 	__i915_sched_flush(se, true);
 }
 
+/* Find the currently executing request on the backend */
+static inline struct i915_request *
+i915_sched_get_active_request(const struct i915_sched *se)
+{
+	lockdep_assert_held(&se->lock);
+
+	if (se->active_request)
+		return se->active_request(se);
+
+	return NULL;
+}
+
 void i915_request_show_with_schedule(struct drm_printer *m,
 				     const struct i915_request *rq,
 				     const char *prefix,
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 2d746af501d6..05579f61e6bd 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -37,6 +37,8 @@ struct i915_sched {
 	 */
 	void (*submit_request)(struct i915_request *rq);
 
+	struct i915_request *(*active_request)(const struct i915_sched *se);
+
 	struct list_head requests; /* active request, on HW */
 	struct list_head hold; /* ready requests, but on hold */
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [CI 3/4] drm/i915: Show execlists queues when dumping state
  2021-02-07 15:03 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
  2021-02-07 15:03 ` [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler Chris Wilson
@ 2021-02-07 15:03 ` Chris Wilson
  2021-02-07 15:03 ` [Intel-gfx] [CI 4/4] drm/i915: Wrap i915_request_use_semaphores() Chris Wilson
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2021-02-07 15:03 UTC (permalink / raw)
  To: intel-gfx

Move the scheduler pretty printer from out of the execlists register
state to and push it to the schduler.

v2: It's not common to all, so shove it out of intel_engine_cs and
split it between scheduler front/back ends

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 296 +++---------------
 .../drm/i915/gt/intel_execlists_submission.c  | 180 +++++++----
 drivers/gpu/drm/i915/i915_request.c           |   6 +
 drivers/gpu/drm/i915/i915_scheduler.c         | 172 ++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |   8 +
 drivers/gpu/drm/i915/i915_scheduler_types.h   |   9 +
 6 files changed, 356 insertions(+), 315 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f6596d454d46..577ebd4a324f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1265,49 +1265,6 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }
 
-static struct intel_timeline *get_timeline(const struct i915_request *rq)
-{
-	struct intel_timeline *tl;
-
-	/*
-	 * Even though we are holding the engine->active.lock here, there
-	 * is no control over the submission queue per-se and we are
-	 * inspecting the active state at a random point in time, with an
-	 * unknown queue. Play safe and make sure the timeline remains valid.
-	 * (Only being used for pretty printing, one extra kref shouldn't
-	 * cause a camel stampede!)
-	 */
-	rcu_read_lock();
-	tl = rcu_dereference(rq->timeline);
-	if (!kref_get_unless_zero(&tl->kref))
-		tl = NULL;
-	rcu_read_unlock();
-
-	return tl;
-}
-
-static int print_ring(char *buf, int sz, struct i915_request *rq)
-{
-	int len = 0;
-
-	if (!i915_request_signaled(rq)) {
-		struct intel_timeline *tl = get_timeline(rq);
-
-		len = scnprintf(buf, sz,
-				"ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
-				i915_ggtt_offset(rq->ring->vma),
-				tl ? tl->hwsp_offset : 0,
-				hwsp_seqno(rq),
-				DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
-						      1000 * 1000));
-
-		if (tl)
-			intel_timeline_put(tl);
-	}
-
-	return len;
-}
-
 static void hexdump(struct drm_printer *m, const void *buf, size_t len)
 {
 	const size_t rowsize = 8 * sizeof(u32);
@@ -1337,205 +1294,69 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
 	}
 }
 
-static const char *repr_timer(const struct timer_list *t)
-{
-	if (!READ_ONCE(t->expires))
-		return "inactive";
-
-	if (timer_pending(t))
-		return "active";
-
-	return "expired";
-}
-
 static void intel_engine_print_registers(struct intel_engine_cs *engine,
 					 struct drm_printer *m)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct drm_i915_private *i915 = engine->i915;
 	u64 addr;
 
-	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
-		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
-	if (HAS_EXECLISTS(dev_priv)) {
-		drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(i915, 4, 7))
+		drm_printf(m, "CCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+	if (HAS_EXECLISTS(i915)) {
+		drm_printf(m, "EL_STAT_HI: 0x%08x\n",
 			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
-		drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+		drm_printf(m, "EL_STAT_LO: 0x%08x\n",
 			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
 	}
-	drm_printf(m, "\tRING_START: 0x%08x\n",
+	drm_printf(m, "RING_START: 0x%08x\n",
 		   ENGINE_READ(engine, RING_START));
-	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
+	drm_printf(m, "RING_HEAD:  0x%08x\n",
 		   ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
-	drm_printf(m, "\tRING_TAIL:  0x%08x\n",
+	drm_printf(m, "RING_TAIL:  0x%08x\n",
 		   ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
-	drm_printf(m, "\tRING_CTL:   0x%08x%s\n",
+	drm_printf(m, "RING_CTL:   0x%08x%s\n",
 		   ENGINE_READ(engine, RING_CTL),
 		   ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
 	if (INTEL_GEN(engine->i915) > 2) {
-		drm_printf(m, "\tRING_MODE:  0x%08x%s\n",
+		drm_printf(m, "RING_MODE:  0x%08x%s\n",
 			   ENGINE_READ(engine, RING_MI_MODE),
 			   ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
 	}
 
-	if (INTEL_GEN(dev_priv) >= 6) {
-		drm_printf(m, "\tRING_IMR:   0x%08x\n",
+	if (INTEL_GEN(i915) >= 6) {
+		drm_printf(m, "RING_IMR:   0x%08x\n",
 			   ENGINE_READ(engine, RING_IMR));
-		drm_printf(m, "\tRING_ESR:   0x%08x\n",
+		drm_printf(m, "RING_ESR:   0x%08x\n",
 			   ENGINE_READ(engine, RING_ESR));
-		drm_printf(m, "\tRING_EMR:   0x%08x\n",
+		drm_printf(m, "RING_EMR:   0x%08x\n",
 			   ENGINE_READ(engine, RING_EMR));
-		drm_printf(m, "\tRING_EIR:   0x%08x\n",
+		drm_printf(m, "RING_EIR:   0x%08x\n",
 			   ENGINE_READ(engine, RING_EIR));
 	}
 
 	addr = intel_engine_get_active_head(engine);
-	drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
+	drm_printf(m, "ACTHD:  0x%08x_%08x\n",
 		   upper_32_bits(addr), lower_32_bits(addr));
 	addr = intel_engine_get_last_batch_head(engine);
-	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
+	drm_printf(m, "BBADDR: 0x%08x_%08x\n",
 		   upper_32_bits(addr), lower_32_bits(addr));
-	if (INTEL_GEN(dev_priv) >= 8)
+	if (INTEL_GEN(i915) >= 8)
 		addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
-	else if (INTEL_GEN(dev_priv) >= 4)
+	else if (INTEL_GEN(i915) >= 4)
 		addr = ENGINE_READ(engine, RING_DMA_FADD);
 	else
 		addr = ENGINE_READ(engine, DMA_FADD_I8XX);
-	drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
+	drm_printf(m, "DMA_FADDR: 0x%08x_%08x\n",
 		   upper_32_bits(addr), lower_32_bits(addr));
-	if (INTEL_GEN(dev_priv) >= 4) {
-		drm_printf(m, "\tIPEIR: 0x%08x\n",
+	if (INTEL_GEN(i915) >= 4) {
+		drm_printf(m, "IPEIR: 0x%08x\n",
 			   ENGINE_READ(engine, RING_IPEIR));
-		drm_printf(m, "\tIPEHR: 0x%08x\n",
+		drm_printf(m, "IPEHR: 0x%08x\n",
 			   ENGINE_READ(engine, RING_IPEHR));
 	} else {
-		drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
-		drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
+		drm_printf(m, "IPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
+		drm_printf(m, "IPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
 	}
-
-	if (intel_engine_uses_guc(engine)) {
-		/* nothing to print yet */
-	} else if (HAS_EXECLISTS(dev_priv)) {
-		struct i915_sched *se = intel_engine_get_scheduler(engine);
-		struct i915_request * const *port, *rq;
-		const u32 *hws =
-			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
-		const u8 num_entries = execlists->csb_size;
-		unsigned int idx;
-		u8 read, write;
-
-		drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
-			   yesno(test_bit(TASKLET_STATE_SCHED,
-					  &se->tasklet.state)),
-			   enableddisabled(!atomic_read(&se->tasklet.count)),
-			   repr_timer(&engine->execlists.preempt),
-			   repr_timer(&engine->execlists.timer));
-
-		read = execlists->csb_head;
-		write = READ_ONCE(*execlists->csb_write);
-
-		drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
-			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
-			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
-			   read, write, num_entries);
-
-		if (read >= num_entries)
-			read = 0;
-		if (write >= num_entries)
-			write = 0;
-		if (read > write)
-			write += num_entries;
-		while (read < write) {
-			idx = ++read % num_entries;
-			drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
-				   idx, hws[idx * 2], hws[idx * 2 + 1]);
-		}
-
-		i915_sched_lock_bh(se);
-		rcu_read_lock();
-		for (port = execlists->active; (rq = *port); port++) {
-			char hdr[160];
-			int len;
-
-			len = scnprintf(hdr, sizeof(hdr),
-					"\t\tActive[%d]:  ccid:%08x%s%s, ",
-					(int)(port - execlists->active),
-					rq->context->lrc.ccid,
-					intel_context_is_closed(rq->context) ? "!" : "",
-					intel_context_is_banned(rq->context) ? "*" : "");
-			len += print_ring(hdr + len, sizeof(hdr) - len, rq);
-			scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
-			i915_request_show(m, rq, hdr, 0);
-		}
-		for (port = execlists->pending; (rq = *port); port++) {
-			char hdr[160];
-			int len;
-
-			len = scnprintf(hdr, sizeof(hdr),
-					"\t\tPending[%d]: ccid:%08x%s%s, ",
-					(int)(port - execlists->pending),
-					rq->context->lrc.ccid,
-					intel_context_is_closed(rq->context) ? "!" : "",
-					intel_context_is_banned(rq->context) ? "*" : "");
-			len += print_ring(hdr + len, sizeof(hdr) - len, rq);
-			scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
-			i915_request_show(m, rq, hdr, 0);
-		}
-		rcu_read_unlock();
-		i915_sched_unlock_bh(se);
-	} else if (INTEL_GEN(dev_priv) > 6) {
-		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
-			   ENGINE_READ(engine, RING_PP_DIR_BASE));
-		drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
-			   ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
-		drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
-			   ENGINE_READ(engine, RING_PP_DIR_DCLV));
-	}
-}
-
-static void
-print_request_ring(struct drm_printer *m, const struct i915_request *rq)
-{
-	void *ring;
-	int size;
-
-	drm_printf(m,
-		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
-		   rq->head, rq->postfix, rq->tail,
-		   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
-		   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
-
-	size = rq->tail - rq->head;
-	if (rq->tail < rq->head)
-		size += rq->ring->size;
-
-	ring = kmalloc(size, GFP_ATOMIC);
-	if (ring) {
-		const void *vaddr = rq->ring->vaddr;
-		unsigned int head = rq->head;
-		unsigned int len = 0;
-
-		if (rq->tail < head) {
-			len = rq->ring->size - head;
-			memcpy(ring, vaddr + head, len);
-			head = 0;
-		}
-		memcpy(ring + len, vaddr + head, size - len);
-
-		hexdump(m, ring, size);
-		kfree(ring);
-	}
-}
-
-static unsigned long list_count(struct list_head *list)
-{
-	struct list_head *pos;
-	unsigned long count = 0;
-
-	list_for_each(pos, list)
-		count++;
-
-	return count;
 }
 
 static unsigned long read_ul(void *p, size_t x)
@@ -1565,9 +1386,9 @@ static void print_properties(struct intel_engine_cs *engine,
 	};
 	const struct pmap *p;
 
-	drm_printf(m, "\tProperties:\n");
+	drm_printf(m, "Properties:\n");
 	for (p = props; p->name; p++)
-		drm_printf(m, "\t\t%s: %lu [default %lu]\n",
+		drm_printf(m, "\t%s: %lu [default %lu]\n",
 			   p->name,
 			   read_ul(&engine->props, p->offset),
 			   read_ul(&engine->defaults, p->offset));
@@ -1578,10 +1399,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		       const char *header, ...)
 {
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	const struct i915_request *rq;
 	intel_wakeref_t wakeref;
-	unsigned long flags;
 	ktime_t dummy;
 
 	if (header) {
@@ -1595,78 +1414,41 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	if (intel_gt_is_wedged(engine->gt))
 		drm_printf(m, "*** WEDGED ***\n");
 
-	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
-	drm_printf(m, "\tBarriers?: %s\n",
+	drm_printf(m, "Awake? %d\n", atomic_read(&engine->wakeref.count));
+	drm_printf(m, "Barriers?: %s\n",
 		   yesno(!llist_empty(&engine->barrier_tasks)));
-	drm_printf(m, "\tLatency: %luus\n",
+	drm_printf(m, "Latency: %luus\n",
 		   ewma__engine_latency_read(&engine->latency));
 	if (intel_engine_supports_stats(engine))
-		drm_printf(m, "\tRuntime: %llums\n",
+		drm_printf(m, "Runtime: %llums\n",
 			   ktime_to_ms(intel_engine_get_busy_time(engine,
 								  &dummy)));
-	drm_printf(m, "\tForcewake: %x domains, %d active\n",
+	drm_printf(m, "Forcewake: %x domains, %d active\n",
 		   engine->fw_domain, READ_ONCE(engine->fw_active));
 
 	rcu_read_lock();
 	rq = READ_ONCE(engine->heartbeat.systole);
 	if (rq)
-		drm_printf(m, "\tHeartbeat: %d ms ago\n",
+		drm_printf(m, "Heartbeat: %d ms ago\n",
 			   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
 	rcu_read_unlock();
-	drm_printf(m, "\tReset count: %d (global %d)\n",
+	drm_printf(m, "Reset count: %d (global %d)\n",
 		   i915_reset_engine_count(error, engine),
 		   i915_reset_count(error));
 	print_properties(engine, m);
 
-	drm_printf(m, "\tRequests:\n");
+	i915_sched_show(m, intel_engine_get_scheduler(engine),
+			i915_request_show, 8);
 
-	rcu_read_lock();
-	spin_lock_irqsave(&se->lock, flags);
-	rq = i915_sched_get_active_request(se);
-	if (rq) {
-		struct intel_timeline *tl = get_timeline(rq);
-
-		i915_request_show(m, rq, "\t\tactive ", 0);
-
-		drm_printf(m, "\t\tring->start:  0x%08x\n",
-			   i915_ggtt_offset(rq->ring->vma));
-		drm_printf(m, "\t\tring->head:   0x%08x\n",
-			   rq->ring->head);
-		drm_printf(m, "\t\tring->tail:   0x%08x\n",
-			   rq->ring->tail);
-		drm_printf(m, "\t\tring->emit:   0x%08x\n",
-			   rq->ring->emit);
-		drm_printf(m, "\t\tring->space:  0x%08x\n",
-			   rq->ring->space);
-
-		if (tl) {
-			drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
-				   tl->hwsp_offset);
-			intel_timeline_put(tl);
-		}
-
-		print_request_ring(m, rq);
-
-		if (rq->context->lrc_reg_state) {
-			drm_printf(m, "Logical Ring Context:\n");
-			hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
-		}
-	}
-	drm_printf(m, "\tOn hold?: %lu\n", list_count(&se->hold));
-	spin_unlock_irqrestore(&se->lock, flags);
-	rcu_read_unlock();
-
-	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
+	drm_printf(m, "MMIO base:  0x%08x\n", engine->mmio_base);
 	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
 	if (wakeref) {
 		intel_engine_print_registers(engine, m);
 		intel_runtime_pm_put(engine->uncore->rpm, wakeref);
 	} else {
-		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
+		drm_printf(m, "Device is asleep; skipping register dump\n");
 	}
 
-	intel_execlists_show_requests(engine, m, i915_request_show, 8);
-
 	drm_printf(m, "HWSP:\n");
 	hexdump(m, engine->status_page.addr, PAGE_SIZE);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index f110348f2ae2..85ff5fe861b4 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -199,6 +199,14 @@ struct virtual_engine {
 	struct intel_engine_cs *siblings[];
 };
 
+static void execlists_show(struct drm_printer *m,
+			   struct i915_sched *se,
+			   void (*show_request)(struct drm_printer *m,
+						const struct i915_request *rq,
+						const char *prefix,
+						int indent),
+			   unsigned int max);
+
 static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
 {
 	GEM_BUG_ON(!intel_engine_is_virtual(engine));
@@ -2903,6 +2911,7 @@ static void init_execlists(struct intel_engine_cs *engine)
 	u32 base = engine->mmio_base;
 
 	engine->sched.active_request = execlists_active_request;
+	engine->sched.show = execlists_show;
 	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
 
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
@@ -3519,75 +3528,72 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
 	return 0;
 }
 
-void intel_execlists_show_requests(struct intel_engine_cs *engine,
-				   struct drm_printer *m,
-				   void (*show_request)(struct drm_printer *m,
-							const struct i915_request *rq,
-							const char *prefix,
-							int indent),
-				   unsigned int max)
+static const char *repr_timer(const struct timer_list *t)
 {
-	const struct intel_engine_execlists *execlists = &engine->execlists;
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
+	if (!READ_ONCE(t->expires))
+		return "inactive";
+
+	if (timer_pending(t))
+		return "active";
+
+	return "expired";
+}
+
+static int print_ring(char *buf, int sz, struct i915_request *rq)
+{
+	int len = 0;
+
+	rcu_read_lock();
+	if (!i915_request_signaled(rq)) {
+		struct intel_timeline *tl = rcu_dereference(rq->timeline);
+
+		len = scnprintf(buf, sz,
+				"ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
+				i915_ggtt_offset(rq->ring->vma),
+				tl ? tl->hwsp_offset : 0,
+				hwsp_seqno(rq),
+				DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
+						      1000 * 1000));
+	}
+	rcu_read_unlock();
+
+	return len;
+}
+
+static void execlists_show(struct drm_printer *m,
+			   struct i915_sched *se,
+			   void (*show_request)(struct drm_printer *m,
+						const struct i915_request *rq,
+						const char *prefix,
+						int indent),
+			   unsigned int max)
+{
+	const struct intel_engine_cs *engine =
+		container_of(se, typeof(*engine), sched);
+	const struct intel_engine_execlists *el = &engine->execlists;
+	const u64 *hws = el->csb_status;
+	const u8 num_entries = el->csb_size;
+	struct i915_request * const *port;
 	struct i915_request *rq, *last;
-	unsigned long flags;
+	intel_wakeref_t wakeref;
 	unsigned int count;
 	struct rb_node *rb;
+	unsigned int idx;
+	u8 read, write;
 
-	spin_lock_irqsave(&se->lock, flags);
+	wakeref = intel_runtime_pm_get(engine->uncore->rpm);
+	rcu_read_lock();
 
 	last = NULL;
 	count = 0;
-	list_for_each_entry(rq, &se->requests, sched.link) {
-		if (count++ < max - 1)
-			show_request(m, rq, "\t\t", 0);
-		else
-			last = rq;
-	}
-	if (last) {
-		if (count > max) {
-			drm_printf(m,
-				   "\t\t...skipping %d executing requests...\n",
-				   count - max);
-		}
-		show_request(m, last, "\t\t", 0);
-	}
-
-	if (execlists->queue_priority_hint != INT_MIN)
-		drm_printf(m, "\t\tQueue priority hint: %d\n",
-			   READ_ONCE(execlists->queue_priority_hint));
-
-	last = NULL;
-	count = 0;
-	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
-		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
-
-		priolist_for_each_request(rq, p) {
-			if (count++ < max - 1)
-				show_request(m, rq, "\t\t", 0);
-			else
-				last = rq;
-		}
-	}
-	if (last) {
-		if (count > max) {
-			drm_printf(m,
-				   "\t\t...skipping %d queued requests...\n",
-				   count - max);
-		}
-		show_request(m, last, "\t\t", 0);
-	}
-
-	last = NULL;
-	count = 0;
-	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
+	for (rb = rb_first_cached(&el->virtual); rb; rb = rb_next(rb)) {
 		struct virtual_engine *ve =
 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 		struct i915_request *rq = READ_ONCE(ve->request);
 
 		if (rq) {
 			if (count++ < max - 1)
-				show_request(m, rq, "\t\t", 0);
+				show_request(m, rq, "\t", 0);
 			else
 				last = rq;
 		}
@@ -3595,13 +3601,71 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 	if (last) {
 		if (count > max) {
 			drm_printf(m,
-				   "\t\t...skipping %d virtual requests...\n",
+				   "\t...skipping %d virtual requests...\n",
 				   count - max);
 		}
-		show_request(m, last, "\t\t", 0);
+		show_request(m, last, "\t", 0);
 	}
 
-	spin_unlock_irqrestore(&se->lock, flags);
+	read = el->csb_head;
+	write = READ_ONCE(*el->csb_write);
+
+	drm_printf(m, "Execlist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
+		   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
+		   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
+		   read, write, num_entries);
+
+	if (read >= num_entries)
+		read = 0;
+	if (write >= num_entries)
+		write = 0;
+	if (read > write)
+		write += num_entries;
+	while (read < write) {
+		idx = ++read % num_entries;
+		drm_printf(m, "Execlist CSB[%d]: 0x%08x, context: %d\n",
+			   idx,
+			   lower_32_bits(hws[idx]),
+			   upper_32_bits(hws[idx]));
+	}
+
+	i915_sched_lock_bh(se);
+	for (port = el->active; (rq = *port); port++) {
+		char hdr[160];
+		int len;
+
+		len = scnprintf(hdr, sizeof(hdr),
+				"Active[%d]:  ccid:%08x%s%s, ",
+				(int)(port - el->active),
+				rq->context->lrc.ccid,
+				intel_context_is_closed(rq->context) ? "!" : "",
+				intel_context_is_banned(rq->context) ? "*" : "");
+		len += print_ring(hdr + len, sizeof(hdr) - len, rq);
+		scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
+		i915_request_show(m, rq, hdr, 0);
+	}
+	for (port = el->pending; (rq = *port); port++) {
+		char hdr[160];
+		int len;
+
+		len = scnprintf(hdr, sizeof(hdr),
+				"Pending[%d]: ccid:%08x%s%s, ",
+				(int)(port - el->pending),
+				rq->context->lrc.ccid,
+				intel_context_is_closed(rq->context) ? "!" : "",
+				intel_context_is_banned(rq->context) ? "*" : "");
+		len += print_ring(hdr + len, sizeof(hdr) - len, rq);
+		scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
+		i915_request_show(m, rq, hdr, 0);
+	}
+	i915_sched_unlock_bh(se);
+
+	drm_printf(m, "Execlists preempt? %s, timeslice? %s\n",
+		   repr_timer(&el->preempt),
+		   repr_timer(&el->timer));
+
+	rcu_read_unlock();
+	intel_runtime_pm_put(engine->uncore->rpm, wakeref);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 792dd0bbea3b..459f727b03cd 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1827,6 +1827,9 @@ static char queue_status(const struct i915_request *rq)
 	if (i915_request_is_active(rq))
 		return 'E';
 
+	if (i915_request_on_hold(rq))
+		return 'S';
+
 	if (i915_request_is_ready(rq))
 		return intel_engine_is_virtual(rq->engine) ? 'V' : 'R';
 
@@ -1895,6 +1898,9 @@ void i915_request_show(struct drm_printer *m,
 	 *    - a completed request may still be regarded as executing, its
 	 *      status may not be updated until it is retired and removed
 	 *      from the lists
+	 *
+	 *  S [Suspended]
+	 *    - the request has been temporarily suspended from execution
 	 */
 
 	x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf));
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index c1674a84ea74..a8fb787278e6 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -1095,6 +1095,178 @@ void i915_request_show_with_schedule(struct drm_printer *m,
 	rcu_read_unlock();
 }
 
+static void hexdump(struct drm_printer *m, const void *buf, size_t len)
+{
+	const size_t rowsize = 8 * sizeof(u32);
+	const void *prev = NULL;
+	bool skip = false;
+	size_t pos;
+
+	for (pos = 0; pos < len; pos += rowsize) {
+		char line[128];
+
+		if (prev && !memcmp(prev, buf + pos, rowsize)) {
+			if (!skip) {
+				drm_printf(m, "*\n");
+				skip = true;
+			}
+			continue;
+		}
+
+		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+						rowsize, sizeof(u32),
+						line, sizeof(line),
+						false) >= sizeof(line));
+		drm_printf(m, "[%04zx] %s\n", pos, line);
+
+		prev = buf + pos;
+		skip = false;
+	}
+}
+
+static void
+print_request_ring(struct drm_printer *m, const struct i915_request *rq)
+{
+	void *ring;
+	int size;
+
+	drm_printf(m,
+		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
+		   rq->head, rq->postfix, rq->tail,
+		   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
+		   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
+
+	size = rq->tail - rq->head;
+	if (rq->tail < rq->head)
+		size += rq->ring->size;
+
+	ring = kmalloc(size, GFP_ATOMIC);
+	if (ring) {
+		const void *vaddr = rq->ring->vaddr;
+		unsigned int head = rq->head;
+		unsigned int len = 0;
+
+		if (rq->tail < head) {
+			len = rq->ring->size - head;
+			memcpy(ring, vaddr + head, len);
+			head = 0;
+		}
+		memcpy(ring + len, vaddr + head, size - len);
+
+		hexdump(m, ring, size);
+		kfree(ring);
+	}
+}
+
+void i915_sched_show(struct drm_printer *m,
+		     struct i915_sched *se,
+		     void (*show_request)(struct drm_printer *m,
+					  const struct i915_request *rq,
+					  const char *prefix,
+					  int indent),
+		     unsigned int max)
+{
+	const struct i915_request *rq, *last;
+	unsigned long flags;
+	unsigned int count;
+	struct rb_node *rb;
+
+	rcu_read_lock();
+	spin_lock_irqsave(&se->lock, flags);
+
+	rq = i915_sched_get_active_request(se);
+	if (rq) {
+		i915_request_show(m, rq, "Active ", 0);
+
+		drm_printf(m, "\tring->start:  0x%08x\n",
+			   i915_ggtt_offset(rq->ring->vma));
+		drm_printf(m, "\tring->head:   0x%08x\n",
+			   rq->ring->head);
+		drm_printf(m, "\tring->tail:   0x%08x\n",
+			   rq->ring->tail);
+		drm_printf(m, "\tring->emit:   0x%08x\n",
+			   rq->ring->emit);
+		drm_printf(m, "\tring->space:  0x%08x\n",
+			   rq->ring->space);
+		drm_printf(m, "\tring->hwsp:   0x%08x\n",
+			   i915_request_active_timeline(rq)->hwsp_offset);
+
+		print_request_ring(m, rq);
+
+		if (rq->context->lrc_reg_state) {
+			drm_printf(m, "Logical Ring Context:\n");
+			hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
+		}
+	}
+
+	drm_printf(m, "Tasklet queued? %s (%s)\n",
+		   yesno(test_bit(TASKLET_STATE_SCHED, &se->tasklet.state)),
+		   enableddisabled(!atomic_read(&se->tasklet.count)));
+
+	drm_printf(m, "Requests:\n");
+
+	last = NULL;
+	count = 0;
+	list_for_each_entry(rq, &se->requests, sched.link) {
+		if (count++ < max - 1)
+			show_request(m, rq, "\t", 0);
+		else
+			last = rq;
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t...skipping %d executing requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t", 0);
+	}
+
+	last = NULL;
+	count = 0;
+	for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) {
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+
+		priolist_for_each_request(rq, p) {
+			if (count++ < max - 1)
+				show_request(m, rq, "\t", 0);
+			else
+				last = rq;
+		}
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t...skipping %d queued requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t", 0);
+	}
+
+	last = NULL;
+	count = 0;
+	list_for_each_entry(rq, &se->hold, sched.link) {
+		if (count++ < max - 1)
+			show_request(m, rq, "\t", 0);
+		else
+			last = rq;
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t...skipping %d suspended requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t", 0);
+	}
+
+	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
+
+	if (se->show)
+		se->show(m, se, show_request, max);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_scheduler.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 1095f2e9fb1b..a12083721c84 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -155,4 +155,12 @@ void i915_request_show_with_schedule(struct drm_printer *m,
 				     const char *prefix,
 				     int indent);
 
+void i915_sched_show(struct drm_printer *m,
+		     struct i915_sched *se,
+		     void (*show_request)(struct drm_printer *m,
+					  const struct i915_request *rq,
+					  const char *prefix,
+					  int indent),
+		     unsigned int max);
+
 #endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 05579f61e6bd..a8502c94d7c5 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -13,6 +13,7 @@
 
 #include "i915_priolist_types.h"
 
+struct drm_printer;
 struct i915_request;
 
 /**
@@ -39,6 +40,14 @@ struct i915_sched {
 
 	struct i915_request *(*active_request)(const struct i915_sched *se);
 
+	void (*show)(struct drm_printer *m,
+		     struct i915_sched *se,
+		     void (*show_request)(struct drm_printer *m,
+					  const struct i915_request *rq,
+					  const char *prefix,
+					  int indent),
+		     unsigned int max);
+
 	struct list_head requests; /* active request, on HW */
 	struct list_head hold; /* ready requests, but on hold */
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [CI 4/4] drm/i915: Wrap i915_request_use_semaphores()
  2021-02-07 15:03 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
  2021-02-07 15:03 ` [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler Chris Wilson
  2021-02-07 15:03 ` [Intel-gfx] [CI 3/4] drm/i915: Show execlists queues when dumping state Chris Wilson
@ 2021-02-07 15:03 ` Chris Wilson
  2021-02-07 15:16 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/4] drm/i915: Move submit_request to i915_sched_engine Patchwork
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2021-02-07 15:03 UTC (permalink / raw)
  To: intel-gfx

Wrap the query on whether the backend engine supports us emitting
semaphores to coordinate multiple requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c | 2 +-
 drivers/gpu/drm/i915/i915_request.h | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 459f727b03cd..e7b4c4bc41a6 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1141,7 +1141,7 @@ __i915_request_await_execution(struct i915_request *to,
 	 * immediate execution, and so we must wait until it reaches the
 	 * active slot.
 	 */
-	if (intel_engine_has_semaphores(to->engine) &&
+	if (i915_request_use_semaphores(to) &&
 	    !i915_request_has_initial_breadcrumb(to)) {
 		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
 		if (err < 0)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 7e722ccc9c4b..dd10a6db3d21 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -645,4 +645,9 @@ static inline bool i915_request_is_executing(const struct i915_request *rq)
 	return intel_context_inflight(rq->context);
 }
 
+static inline bool i915_request_use_semaphores(const struct i915_request *rq)
+{
+	return intel_engine_has_semaphores(rq->engine);
+}
+
 #endif /* I915_REQUEST_H */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/4] drm/i915: Move submit_request to i915_sched_engine
  2021-02-07 15:03 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
                   ` (2 preceding siblings ...)
  2021-02-07 15:03 ` [Intel-gfx] [CI 4/4] drm/i915: Wrap i915_request_use_semaphores() Chris Wilson
@ 2021-02-07 15:16 ` Patchwork
  2021-02-07 15:45 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  2021-02-07 17:11 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  5 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2021-02-07 15:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,1/4] drm/i915: Move submit_request to i915_sched_engine
URL   : https://patchwork.freedesktop.org/series/86822/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
40ff776943d9 drm/i915: Move submit_request to i915_sched_engine
90cb79f67175 drm/i915: Move finding the current active request to the scheduler
-:246: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#246: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:111:
+			     "heartbeat " RQ_FMT "completed\n",

-:265: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#265: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:143:
+				     "heartbeat " RQ_FMT " pending\n",

-:275: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#275: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:159:
+				     "bumping heartbeat " RQ_FMT " prio:%d\n",

-:283: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#283: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:167:
+				     "heartbeat " RQ_FMT " stuck\n",

-:293: WARNING:EMBEDDED_FUNCTION_NAME: Prefer using '"%s...", __func__' to using 'heartbeat', this function's name, in a string
#293: FILE: drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c:201:
+	ENGINE_TRACE(engine, "heartbeat " RQ_FMT "started\n", RQ_ARG(rq));

total: 0 errors, 5 warnings, 0 checks, 579 lines checked
f89a52a6bf2f drm/i915: Show execlists queues when dumping state
-:498: WARNING:LONG_LINE: line length of 102 exceeds 100 columns
#498: FILE: drivers/gpu/drm/i915/gt/intel_execlists_submission.c:3555:
+				DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),

total: 0 errors, 1 warnings, 0 checks, 839 lines checked
9da368f0928b drm/i915: Wrap i915_request_use_semaphores()


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [CI,1/4] drm/i915: Move submit_request to i915_sched_engine
  2021-02-07 15:03 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
                   ` (3 preceding siblings ...)
  2021-02-07 15:16 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/4] drm/i915: Move submit_request to i915_sched_engine Patchwork
@ 2021-02-07 15:45 ` Patchwork
  2021-02-07 17:11 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  5 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2021-02-07 15:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 2913 bytes --]

== Series Details ==

Series: series starting with [CI,1/4] drm/i915: Move submit_request to i915_sched_engine
URL   : https://patchwork.freedesktop.org/series/86822/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_9743 -> Patchwork_19623
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/index.html

Known issues
------------

  Here are the changes found in Patchwork_19623 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_basic@semaphore:
    - fi-bdw-5557u:       NOTRUN -> [SKIP][1] ([fdo#109271]) +22 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/fi-bdw-5557u/igt@amdgpu/amd_basic@semaphore.html

  * igt@core_hotunplug@unbind-rebind:
    - fi-bdw-5557u:       NOTRUN -> [WARN][2] ([i915#2283])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/fi-bdw-5557u/igt@core_hotunplug@unbind-rebind.html

  * igt@debugfs_test@read_all_entries:
    - fi-tgl-y:           [PASS][3] -> [DMESG-WARN][4] ([i915#402]) +2 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/fi-tgl-y/igt@debugfs_test@read_all_entries.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/fi-tgl-y/igt@debugfs_test@read_all_entries.html

  
#### Possible fixes ####

  * igt@gem_tiled_blits@basic:
    - fi-tgl-y:           [DMESG-WARN][5] ([i915#402]) -> [PASS][6] +1 similar issue
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/fi-tgl-y/igt@gem_tiled_blits@basic.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/fi-tgl-y/igt@gem_tiled_blits@basic.html

  
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#2283]: https://gitlab.freedesktop.org/drm/intel/issues/2283
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402


Participating hosts (44 -> 38)
------------------------------

  Missing    (6): fi-jsl-1 fi-ilk-m540 fi-hsw-4200u fi-bsw-cyan fi-ctg-p8600 fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_9743 -> Patchwork_19623

  CI-20190529: 20190529
  CI_DRM_9743: 9781d6be4363dd76f7e42e6249a67a6af43f40b7 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5997: 2f27f54d9fae945a4b7cffc7d7370abb7a41caf2 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_19623: 9da368f0928b8eed572fd1d69e0b689c43aa193c @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

9da368f0928b drm/i915: Wrap i915_request_use_semaphores()
f89a52a6bf2f drm/i915: Show execlists queues when dumping state
90cb79f67175 drm/i915: Move finding the current active request to the scheduler
40ff776943d9 drm/i915: Move submit_request to i915_sched_engine

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/index.html

[-- Attachment #1.2: Type: text/html, Size: 3681 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [CI,1/4] drm/i915: Move submit_request to i915_sched_engine
  2021-02-07 15:03 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
                   ` (4 preceding siblings ...)
  2021-02-07 15:45 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2021-02-07 17:11 ` Patchwork
  5 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2021-02-07 17:11 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 30302 bytes --]

== Series Details ==

Series: series starting with [CI,1/4] drm/i915: Move submit_request to i915_sched_engine
URL   : https://patchwork.freedesktop.org/series/86822/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_9743_full -> Patchwork_19623_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_19623_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_19623_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_19623_full:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_big_fb@linear-32bpp-rotate-0:
    - shard-tglb:         [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-tglb5/igt@kms_big_fb@linear-32bpp-rotate-0.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-tglb6/igt@kms_big_fb@linear-32bpp-rotate-0.html

  
Known issues
------------

  Here are the changes found in Patchwork_19623_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_persistence@close-replace-race:
    - shard-glk:          [PASS][3] -> [TIMEOUT][4] ([i915#2918])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk7/igt@gem_ctx_persistence@close-replace-race.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk4/igt@gem_ctx_persistence@close-replace-race.html

  * igt@gem_exec_fair@basic-deadline:
    - shard-glk:          [PASS][5] -> [FAIL][6] ([i915#2846])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk4/igt@gem_exec_fair@basic-deadline.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk9/igt@gem_exec_fair@basic-deadline.html

  * igt@gem_exec_fair@basic-none@vcs1:
    - shard-iclb:         NOTRUN -> [FAIL][7] ([i915#2842])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb4/igt@gem_exec_fair@basic-none@vcs1.html

  * igt@gem_exec_fair@basic-pace-solo@rcs0:
    - shard-kbl:          [PASS][8] -> [FAIL][9] ([i915#2842])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-kbl3/igt@gem_exec_fair@basic-pace-solo@rcs0.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl2/igt@gem_exec_fair@basic-pace-solo@rcs0.html

  * igt@gem_exec_fair@basic-pace@bcs0:
    - shard-apl:          NOTRUN -> [SKIP][10] ([fdo#109271]) +47 similar issues
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl8/igt@gem_exec_fair@basic-pace@bcs0.html

  * igt@gem_exec_schedule@u-fairslice@rcs0:
    - shard-glk:          [PASS][11] -> [DMESG-WARN][12] ([i915#1610] / [i915#2803])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk3/igt@gem_exec_schedule@u-fairslice@rcs0.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk3/igt@gem_exec_schedule@u-fairslice@rcs0.html

  * igt@gem_exec_schedule@u-fairslice@vcs0:
    - shard-skl:          [PASS][13] -> [DMESG-WARN][14] ([i915#1610] / [i915#2803]) +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl10/igt@gem_exec_schedule@u-fairslice@vcs0.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl9/igt@gem_exec_schedule@u-fairslice@vcs0.html

  * igt@gem_render_copy@x-tiled-to-vebox-yf-tiled:
    - shard-kbl:          NOTRUN -> [SKIP][15] ([fdo#109271]) +11 similar issues
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl7/igt@gem_render_copy@x-tiled-to-vebox-yf-tiled.html

  * igt@gem_userptr_blits@readonly-pwrite-unsync:
    - shard-iclb:         NOTRUN -> [SKIP][16] ([fdo#110426] / [i915#1704])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@gem_userptr_blits@readonly-pwrite-unsync.html

  * igt@gen9_exec_parse@batch-zero-length:
    - shard-iclb:         NOTRUN -> [SKIP][17] ([fdo#112306]) +1 similar issue
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@gen9_exec_parse@batch-zero-length.html

  * igt@i915_module_load@reload-with-fault-injection:
    - shard-skl:          [PASS][18] -> [DMESG-WARN][19] ([i915#1982])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl3/igt@i915_module_load@reload-with-fault-injection.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl2/igt@i915_module_load@reload-with-fault-injection.html

  * igt@i915_suspend@fence-restore-tiled2untiled:
    - shard-apl:          [PASS][20] -> [DMESG-WARN][21] ([i915#180]) +4 similar issues
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl1/igt@i915_suspend@fence-restore-tiled2untiled.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl4/igt@i915_suspend@fence-restore-tiled2untiled.html

  * igt@kms_chamelium@dp-crc-multiple:
    - shard-hsw:          NOTRUN -> [SKIP][22] ([fdo#109271] / [fdo#111827]) +2 similar issues
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-hsw2/igt@kms_chamelium@dp-crc-multiple.html

  * igt@kms_chamelium@hdmi-hpd-for-each-pipe:
    - shard-kbl:          NOTRUN -> [SKIP][23] ([fdo#109271] / [fdo#111827]) +1 similar issue
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl7/igt@kms_chamelium@hdmi-hpd-for-each-pipe.html

  * igt@kms_chamelium@hdmi-mode-timings:
    - shard-iclb:         NOTRUN -> [SKIP][24] ([fdo#109284] / [fdo#111827]) +3 similar issues
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_chamelium@hdmi-mode-timings.html

  * igt@kms_color@pipe-b-ctm-green-to-red:
    - shard-skl:          [PASS][25] -> [FAIL][26] ([i915#129])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl5/igt@kms_color@pipe-b-ctm-green-to-red.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl9/igt@kms_color@pipe-b-ctm-green-to-red.html

  * igt@kms_color@pipe-d-ctm-0-25:
    - shard-iclb:         NOTRUN -> [SKIP][27] ([fdo#109278] / [i915#1149])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_color@pipe-d-ctm-0-25.html

  * igt@kms_color_chamelium@pipe-invalid-degamma-lut-sizes:
    - shard-apl:          NOTRUN -> [SKIP][28] ([fdo#109271] / [fdo#111827]) +2 similar issues
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl3/igt@kms_color_chamelium@pipe-invalid-degamma-lut-sizes.html

  * igt@kms_cursor_crc@pipe-a-cursor-64x64-sliding:
    - shard-skl:          [PASS][29] -> [FAIL][30] ([i915#54]) +4 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl2/igt@kms_cursor_crc@pipe-a-cursor-64x64-sliding.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl1/igt@kms_cursor_crc@pipe-a-cursor-64x64-sliding.html

  * igt@kms_cursor_crc@pipe-c-cursor-512x512-onscreen:
    - shard-iclb:         NOTRUN -> [SKIP][31] ([fdo#109278] / [fdo#109279])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_cursor_crc@pipe-c-cursor-512x512-onscreen.html

  * igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge:
    - shard-glk:          [PASS][32] -> [DMESG-WARN][33] ([i915#118] / [i915#95])
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk4/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk9/igt@kms_cursor_edge_walk@pipe-a-256x256-right-edge.html

  * igt@kms_cursor_edge_walk@pipe-d-64x64-left-edge:
    - shard-hsw:          NOTRUN -> [SKIP][34] ([fdo#109271] / [i915#533]) +4 similar issues
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-hsw2/igt@kms_cursor_edge_walk@pipe-d-64x64-left-edge.html

  * igt@kms_cursor_legacy@2x-flip-vs-cursor-atomic:
    - shard-iclb:         NOTRUN -> [SKIP][35] ([fdo#109274] / [fdo#109278])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_cursor_legacy@2x-flip-vs-cursor-atomic.html

  * igt@kms_cursor_legacy@pipe-d-torture-bo:
    - shard-apl:          NOTRUN -> [SKIP][36] ([fdo#109271] / [i915#533])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl6/igt@kms_cursor_legacy@pipe-d-torture-bo.html

  * igt@kms_fbcon_fbt@fbc-suspend:
    - shard-apl:          [PASS][37] -> [INCOMPLETE][38] ([i915#180])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl2/igt@kms_fbcon_fbt@fbc-suspend.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl1/igt@kms_fbcon_fbt@fbc-suspend.html

  * igt@kms_flip@2x-modeset-vs-vblank-race-interruptible:
    - shard-iclb:         NOTRUN -> [SKIP][39] ([fdo#109274])
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_flip@2x-modeset-vs-vblank-race-interruptible.html

  * igt@kms_flip@flip-vs-expired-vblank@a-edp1:
    - shard-skl:          [PASS][40] -> [FAIL][41] ([i915#79]) +1 similar issue
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl2/igt@kms_flip@flip-vs-expired-vblank@a-edp1.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl1/igt@kms_flip@flip-vs-expired-vblank@a-edp1.html

  * igt@kms_flip@flip-vs-expired-vblank@c-hdmi-a1:
    - shard-glk:          [PASS][42] -> [FAIL][43] ([i915#2122])
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk9/igt@kms_flip@flip-vs-expired-vblank@c-hdmi-a1.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk6/igt@kms_flip@flip-vs-expired-vblank@c-hdmi-a1.html

  * igt@kms_flip@flip-vs-suspend-interruptible@a-dp1:
    - shard-kbl:          [PASS][44] -> [DMESG-WARN][45] ([i915#180]) +4 similar issues
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-kbl7/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl4/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html

  * igt@kms_flip@plain-flip-fb-recreate@b-edp1:
    - shard-skl:          [PASS][46] -> [FAIL][47] ([i915#2122]) +1 similar issue
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl3/igt@kms_flip@plain-flip-fb-recreate@b-edp1.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl8/igt@kms_flip@plain-flip-fb-recreate@b-edp1.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-wc:
    - shard-glk:          [PASS][48] -> [FAIL][49] ([i915#49])
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk4/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-wc.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk9/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-mmap-wc.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-indfb-draw-blt:
    - shard-skl:          NOTRUN -> [SKIP][50] ([fdo#109271])
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl4/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-indfb-draw-blt.html

  * igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-indfb-draw-mmap-cpu:
    - shard-hsw:          NOTRUN -> [SKIP][51] ([fdo#109271]) +40 similar issues
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-hsw2/igt@kms_frontbuffer_tracking@psr-1p-offscren-pri-indfb-draw-mmap-cpu.html

  * igt@kms_frontbuffer_tracking@psr-2p-primscrn-pri-indfb-draw-render:
    - shard-iclb:         NOTRUN -> [SKIP][52] ([fdo#109280]) +5 similar issues
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_frontbuffer_tracking@psr-2p-primscrn-pri-indfb-draw-render.html

  * igt@kms_hdr@bpc-switch:
    - shard-skl:          [PASS][53] -> [FAIL][54] ([i915#1188])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl9/igt@kms_hdr@bpc-switch.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl9/igt@kms_hdr@bpc-switch.html

  * igt@kms_multipipe_modeset@basic-max-pipe-crc-check:
    - shard-iclb:         NOTRUN -> [SKIP][55] ([i915#1839])
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_multipipe_modeset@basic-max-pipe-crc-check.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
    - shard-skl:          [PASS][56] -> [INCOMPLETE][57] ([i915#198]) +1 similar issue
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl5/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl3/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
    - shard-skl:          [PASS][58] -> [INCOMPLETE][59] ([i915#648])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl1/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl8/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html

  * igt@kms_plane_alpha_blend@pipe-c-alpha-7efc:
    - shard-apl:          NOTRUN -> [FAIL][60] ([fdo#108145] / [i915#265])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl3/igt@kms_plane_alpha_blend@pipe-c-alpha-7efc.html

  * igt@kms_plane_alpha_blend@pipe-d-constant-alpha-min:
    - shard-iclb:         NOTRUN -> [SKIP][61] ([fdo#109278]) +5 similar issues
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_plane_alpha_blend@pipe-d-constant-alpha-min.html

  * igt@kms_psr2_sf@plane-move-sf-dmg-area-3:
    - shard-iclb:         NOTRUN -> [SKIP][62] ([i915#658])
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_psr2_sf@plane-move-sf-dmg-area-3.html

  * igt@kms_psr2_su@frontbuffer:
    - shard-iclb:         [PASS][63] -> [SKIP][64] ([fdo#109642] / [fdo#111068] / [i915#658])
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-iclb2/igt@kms_psr2_su@frontbuffer.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb7/igt@kms_psr2_su@frontbuffer.html

  * igt@kms_psr@primary_blt:
    - shard-hsw:          NOTRUN -> [SKIP][65] ([fdo#109271] / [i915#1072]) +1 similar issue
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-hsw2/igt@kms_psr@primary_blt.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [PASS][66] -> [SKIP][67] ([fdo#109441])
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb7/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@kms_psr@psr2_sprite_render:
    - shard-iclb:         NOTRUN -> [SKIP][68] ([fdo#109441])
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_psr@psr2_sprite_render.html

  * igt@kms_vblank@pipe-a-ts-continuation-suspend:
    - shard-apl:          [PASS][69] -> [DMESG-WARN][70] ([i915#180] / [i915#295])
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl1/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl3/igt@kms_vblank@pipe-a-ts-continuation-suspend.html

  * igt@kms_writeback@writeback-invalid-parameters:
    - shard-iclb:         NOTRUN -> [SKIP][71] ([i915#2437])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@kms_writeback@writeback-invalid-parameters.html

  * igt@nouveau_crc@pipe-b-source-outp-complete:
    - shard-iclb:         NOTRUN -> [SKIP][72] ([i915#2530])
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@nouveau_crc@pipe-b-source-outp-complete.html

  * igt@perf@polling-parameterized:
    - shard-glk:          [PASS][73] -> [FAIL][74] ([i915#1542])
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk8/igt@perf@polling-parameterized.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk2/igt@perf@polling-parameterized.html

  * igt@prime_nv_pcopy@test_semaphore:
    - shard-iclb:         NOTRUN -> [SKIP][75] ([fdo#109291])
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb3/igt@prime_nv_pcopy@test_semaphore.html

  
#### Possible fixes ####

  * igt@feature_discovery@psr2:
    - shard-iclb:         [SKIP][76] ([i915#658]) -> [PASS][77]
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-iclb5/igt@feature_discovery@psr2.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb2/igt@feature_discovery@psr2.html

  * igt@gem_exec_fair@basic-none@vcs0:
    - shard-glk:          [FAIL][78] ([i915#2842]) -> [PASS][79] +1 similar issue
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk8/igt@gem_exec_fair@basic-none@vcs0.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk2/igt@gem_exec_fair@basic-none@vcs0.html

  * igt@gem_exec_fair@basic-none@vcs1:
    - shard-kbl:          [FAIL][80] ([i915#2842]) -> [PASS][81]
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-kbl7/igt@gem_exec_fair@basic-none@vcs1.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl6/igt@gem_exec_fair@basic-none@vcs1.html

  * igt@gem_exec_fair@basic-throttle@rcs0:
    - shard-iclb:         [FAIL][82] ([i915#2849]) -> [PASS][83]
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-iclb2/igt@gem_exec_fair@basic-throttle@rcs0.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb7/igt@gem_exec_fair@basic-throttle@rcs0.html

  * igt@gem_exec_schedule@u-fairslice@rcs0:
    - shard-skl:          [DMESG-WARN][84] ([i915#1610] / [i915#2803]) -> [PASS][85]
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl10/igt@gem_exec_schedule@u-fairslice@rcs0.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl9/igt@gem_exec_schedule@u-fairslice@rcs0.html

  * igt@gem_exec_whisper@basic-forked:
    - shard-glk:          [DMESG-WARN][86] ([i915#118] / [i915#95]) -> [PASS][87]
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk3/igt@gem_exec_whisper@basic-forked.html
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk6/igt@gem_exec_whisper@basic-forked.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [FAIL][88] ([i915#454]) -> [PASS][89]
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-iclb6/igt@i915_pm_dc@dc6-psr.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb1/igt@i915_pm_dc@dc6-psr.html

  * igt@kms_color@pipe-c-ctm-0-5:
    - shard-skl:          [DMESG-WARN][90] ([i915#1982]) -> [PASS][91] +1 similar issue
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl7/igt@kms_color@pipe-c-ctm-0-5.html
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl7/igt@kms_color@pipe-c-ctm-0-5.html

  * igt@kms_cursor_crc@pipe-b-cursor-128x128-random:
    - shard-skl:          [FAIL][92] ([i915#54]) -> [PASS][93] +6 similar issues
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl4/igt@kms_cursor_crc@pipe-b-cursor-128x128-random.html
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl8/igt@kms_cursor_crc@pipe-b-cursor-128x128-random.html

  * igt@kms_cursor_legacy@flip-vs-cursor-legacy:
    - shard-skl:          [FAIL][94] ([i915#2346]) -> [PASS][95]
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl1/igt@kms_cursor_legacy@flip-vs-cursor-legacy.html
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl2/igt@kms_cursor_legacy@flip-vs-cursor-legacy.html

  * igt@kms_cursor_legacy@flip-vs-cursor-varying-size:
    - shard-tglb:         [FAIL][96] ([i915#2346]) -> [PASS][97]
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-tglb3/igt@kms_cursor_legacy@flip-vs-cursor-varying-size.html
   [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-tglb3/igt@kms_cursor_legacy@flip-vs-cursor-varying-size.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1:
    - shard-tglb:         [FAIL][98] ([i915#2598]) -> [PASS][99]
   [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-tglb2/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html
   [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-tglb8/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html

  * igt@kms_flip@flip-vs-suspend-interruptible@a-dp1:
    - shard-apl:          [DMESG-WARN][100] ([i915#180]) -> [PASS][101] +5 similar issues
   [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl8/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html
   [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl8/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html

  * igt@kms_flip@flip-vs-suspend@a-vga1:
    - shard-snb:          [DMESG-WARN][102] ([i915#2772] / [i915#42]) -> [PASS][103]
   [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-snb6/igt@kms_flip@flip-vs-suspend@a-vga1.html
   [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-snb4/igt@kms_flip@flip-vs-suspend@a-vga1.html

  * igt@kms_flip@plain-flip-ts-check@c-edp1:
    - shard-skl:          [FAIL][104] ([i915#2122]) -> [PASS][105]
   [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-skl5/igt@kms_flip@plain-flip-ts-check@c-edp1.html
   [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-skl9/igt@kms_flip@plain-flip-ts-check@c-edp1.html

  * igt@kms_psr@psr2_primary_mmap_cpu:
    - shard-iclb:         [SKIP][106] ([fdo#109441]) -> [PASS][107] +2 similar issues
   [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-iclb5/igt@kms_psr@psr2_primary_mmap_cpu.html
   [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb2/igt@kms_psr@psr2_primary_mmap_cpu.html

  * igt@kms_vblank@pipe-b-ts-continuation-suspend:
    - shard-kbl:          [DMESG-WARN][108] ([i915#180]) -> [PASS][109]
   [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-kbl4/igt@kms_vblank@pipe-b-ts-continuation-suspend.html
   [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl7/igt@kms_vblank@pipe-b-ts-continuation-suspend.html

  * igt@sysfs_clients@recycle:
    - shard-kbl:          [FAIL][110] ([i915#3028]) -> [PASS][111]
   [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-kbl2/igt@sysfs_clients@recycle.html
   [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl3/igt@sysfs_clients@recycle.html

  * {igt@sysfs_clients@recycle-many}:
    - shard-apl:          [FAIL][112] ([i915#3028]) -> [PASS][113]
   [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl2/igt@sysfs_clients@recycle-many.html
   [113]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl1/igt@sysfs_clients@recycle-many.html
    - shard-tglb:         [FAIL][114] ([i915#3028]) -> [PASS][115]
   [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-tglb1/igt@sysfs_clients@recycle-many.html
   [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-tglb2/igt@sysfs_clients@recycle-many.html

  * igt@sysfs_clients@sema-10@vcs0:
    - shard-apl:          [SKIP][116] ([fdo#109271] / [i915#3026]) -> [PASS][117] +1 similar issue
   [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl7/igt@sysfs_clients@sema-10@vcs0.html
   [117]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl6/igt@sysfs_clients@sema-10@vcs0.html

  
#### Warnings ####

  * igt@i915_pm_rc6_residency@rc6-idle:
    - shard-iclb:         [FAIL][118] ([i915#2680]) -> [WARN][119] ([i915#2684])
   [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-iclb6/igt@i915_pm_rc6_residency@rc6-idle.html
   [119]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb2/igt@i915_pm_rc6_residency@rc6-idle.html

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [SKIP][120] ([fdo#109349]) -> [DMESG-WARN][121] ([i915#1226])
   [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-iclb6/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [121]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-1:
    - shard-iclb:         [SKIP][122] ([i915#658]) -> [SKIP][123] ([i915#2920]) +2 similar issues
   [122]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-iclb4/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-1.html
   [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb2/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-1.html

  * igt@kms_psr2_sf@plane-move-sf-dmg-area-1:
    - shard-iclb:         [SKIP][124] ([i915#2920]) -> [SKIP][125] ([i915#658]) +1 similar issue
   [124]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-iclb2/igt@kms_psr2_sf@plane-move-sf-dmg-area-1.html
   [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-iclb7/igt@kms_psr2_sf@plane-move-sf-dmg-area-1.html

  * igt@runner@aborted:
    - shard-kbl:          ([FAIL][126], [FAIL][127], [FAIL][128], [FAIL][129]) ([i915#1814] / [i915#2295] / [i915#2505] / [i915#3002] / [i915#602]) -> ([FAIL][130], [FAIL][131], [FAIL][132], [FAIL][133], [FAIL][134]) ([i915#2295] / [i915#3002])
   [126]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-kbl6/igt@runner@aborted.html
   [127]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-kbl6/igt@runner@aborted.html
   [128]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-kbl4/igt@runner@aborted.html
   [129]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-kbl3/igt@runner@aborted.html
   [130]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl4/igt@runner@aborted.html
   [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl4/igt@runner@aborted.html
   [132]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl1/igt@runner@aborted.html
   [133]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl1/igt@runner@aborted.html
   [134]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-kbl3/igt@runner@aborted.html
    - shard-apl:          ([FAIL][135], [FAIL][136], [FAIL][137], [FAIL][138], [FAIL][139], [FAIL][140], [FAIL][141], [FAIL][142], [FAIL][143]) ([fdo#109271] / [i915#1610] / [i915#1814] / [i915#2292] / [i915#2295] / [i915#3002]) -> ([FAIL][144], [FAIL][145], [FAIL][146], [FAIL][147], [FAIL][148], [FAIL][149], [FAIL][150], [FAIL][151], [FAIL][152], [FAIL][153]) ([fdo#109271] / [i915#1814] / [i915#2295] / [i915#3002])
   [135]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl1/igt@runner@aborted.html
   [136]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl6/igt@runner@aborted.html
   [137]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl4/igt@runner@aborted.html
   [138]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl4/igt@runner@aborted.html
   [139]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl4/igt@runner@aborted.html
   [140]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl3/igt@runner@aborted.html
   [141]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl6/igt@runner@aborted.html
   [142]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl8/igt@runner@aborted.html
   [143]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-apl2/igt@runner@aborted.html
   [144]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl7/igt@runner@aborted.html
   [145]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl1/igt@runner@aborted.html
   [146]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl3/igt@runner@aborted.html
   [147]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl4/igt@runner@aborted.html
   [148]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl6/igt@runner@aborted.html
   [149]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl1/igt@runner@aborted.html
   [150]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl1/igt@runner@aborted.html
   [151]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl3/igt@runner@aborted.html
   [152]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl2/igt@runner@aborted.html
   [153]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-apl6/igt@runner@aborted.html
    - shard-glk:          ([FAIL][154], [FAIL][155], [FAIL][156]) ([i915#2295] / [i915#3002] / [k.org#202321]) -> ([FAIL][157], [FAIL][158], [FAIL][159], [FAIL][160]) ([i915#2295] / [i915#2426] / [i915#3002] / [k.org#202321])
   [154]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk6/igt@runner@aborted.html
   [155]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk7/igt@runner@aborted.html
   [156]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9743/shard-glk9/igt@runner@aborted.html
   [157]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk1/igt@runner@aborted.html
   [158]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk3/igt@runner@aborted.html
   [159]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk3/igt@runner@aborted.html
   [160]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/shard-glk7/igt@runner@aborted.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19623/index.html

[-- Attachment #1.2: Type: text/html, Size: 33472 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler
  2021-02-07 12:00 [Intel-gfx] [CI 1/4] " Chris Wilson
@ 2021-02-07 12:00 ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2021-02-07 12:00 UTC (permalink / raw)
  To: intel-gfx

Since finding the currently active request starts by walking the
scheduler lists under the scheduler lock, move the routine to the
scheduler.

v2: Wrap se->active() with i915_sched_get_active_request()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |   3 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 112 +++---------------
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c  |  32 ++++-
 .../drm/i915/gt/intel_execlists_submission.c  |  17 ++-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  18 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c         |  18 ++-
 drivers/gpu/drm/i915/i915_gpu_error.h         |   4 +-
 drivers/gpu/drm/i915/i915_request.c           |  71 +----------
 drivers/gpu/drm/i915/i915_request.h           |  16 +++
 drivers/gpu/drm/i915/i915_scheduler.c         |  24 ++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  12 ++
 drivers/gpu/drm/i915/i915_scheduler_types.h   |   2 +
 12 files changed, 138 insertions(+), 191 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 52bba16c62e8..c530839627bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -230,9 +230,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
 				   ktime_t *now);
 
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine);
-
 u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
 
 void intel_engine_init_active(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 3b299339fb62..f6596d454d46 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1175,31 +1175,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 	}
 }
 
-static bool ring_is_idle(struct intel_engine_cs *engine)
-{
-	bool idle = true;
-
-	if (I915_SELFTEST_ONLY(!engine->mmio_base))
-		return true;
-
-	if (!intel_engine_pm_get_if_awake(engine))
-		return true;
-
-	/* First check that no commands are left in the ring */
-	if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
-	    (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
-		idle = false;
-
-	/* No bit for gen2, so assume the CS parser is idle */
-	if (INTEL_GEN(engine->i915) > 2 &&
-	    !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
-		idle = false;
-
-	intel_engine_pm_put(engine);
-
-	return idle;
-}
-
 /**
  * intel_engine_is_idle() - Report if the engine has finished process all work
  * @engine: the intel_engine_cs
@@ -1210,14 +1185,12 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 bool intel_engine_is_idle(struct intel_engine_cs *engine)
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
+	struct i915_request *rq = NULL;
 
 	/* More white lies, if wedged, hw state is inconsistent */
 	if (intel_gt_is_wedged(engine->gt))
 		return true;
 
-	if (!intel_engine_pm_is_awake(engine))
-		return true;
-
 	/* Waiting to drain ELSP? */
 	synchronize_hardirq(to_pci_dev(engine->i915->drm.dev)->irq);
 	i915_sched_flush(se);
@@ -1226,8 +1199,16 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (!i915_sched_is_idle(se))
 		return false;
 
-	/* Ring stopped? */
-	return ring_is_idle(engine);
+	/* Execution complete? */
+	if (intel_engine_pm_get_if_awake(engine)) {
+		spin_lock_irq(&se->lock);
+		rq = i915_sched_get_active_request(se);
+		spin_unlock_irq(&se->lock);
+
+		intel_engine_pm_put(engine);
+	}
+
+	return !rq;
 }
 
 bool intel_engines_are_idle(struct intel_gt *gt)
@@ -1284,7 +1265,7 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }
 
-static struct intel_timeline *get_timeline(struct i915_request *rq)
+static struct intel_timeline *get_timeline(const struct i915_request *rq)
 {
 	struct intel_timeline *tl;
 
@@ -1512,7 +1493,8 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 	}
 }
 
-static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
+static void
+print_request_ring(struct drm_printer *m, const struct i915_request *rq)
 {
 	void *ring;
 	int size;
@@ -1597,7 +1579,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 {
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	intel_wakeref_t wakeref;
 	unsigned long flags;
 	ktime_t dummy;
@@ -1638,8 +1620,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	drm_printf(m, "\tRequests:\n");
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	rq = i915_sched_get_active_request(se);
 	if (rq) {
 		struct intel_timeline *tl = get_timeline(rq);
 
@@ -1671,6 +1654,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	}
 	drm_printf(m, "\tOn hold?: %lu\n", list_count(&se->hold));
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 
 	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
 	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
@@ -1719,66 +1703,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
 	return ktime_add(total, start);
 }
 
-static bool match_ring(struct i915_request *rq)
-{
-	u32 ring = ENGINE_READ(rq->engine, RING_START);
-
-	return ring == i915_ggtt_offset(rq->ring->vma);
-}
-
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine)
-{
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *request, *active = NULL;
-
-	/*
-	 * We are called by the error capture, reset and to dump engine
-	 * state at random points in time. In particular, note that neither is
-	 * crucially ordered with an interrupt. After a hang, the GPU is dead
-	 * and we assume that no more writes can happen (we waited long enough
-	 * for all writes that were in transaction to be flushed) - adding an
-	 * extra delay for a recent interrupt is pointless. Hence, we do
-	 * not need an engine->irq_seqno_barrier() before the seqno reads.
-	 * At all other times, we must assume the GPU is still running, but
-	 * we only care about the snapshot of this moment.
-	 */
-	lockdep_assert_held(&se->lock);
-
-	rcu_read_lock();
-	request = execlists_active(&engine->execlists);
-	if (request) {
-		struct intel_timeline *tl = request->context->timeline;
-
-		list_for_each_entry_from_reverse(request, &tl->requests, link) {
-			if (__i915_request_is_complete(request))
-				break;
-
-			active = request;
-		}
-	}
-	rcu_read_unlock();
-	if (active)
-		return active;
-
-	list_for_each_entry(request, &se->requests, sched.link) {
-		if (__i915_request_is_complete(request))
-			continue;
-
-		if (!__i915_request_has_started(request))
-			continue;
-
-		/* More than one preemptible request may match! */
-		if (!match_ring(request))
-			continue;
-
-		active = request;
-		break;
-	}
-
-	return active;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "mock_engine.c"
 #include "selftest_engine.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index b6dbd1150ba9..0b062fad1837 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -78,6 +78,21 @@ static void show_heartbeat(const struct i915_request *rq,
 			  rq->sched.attr.priority);
 }
 
+static bool is_wait(struct intel_engine_cs *engine)
+{
+	struct i915_sched *se = intel_engine_get_scheduler(engine);
+	struct i915_request *rq;
+	bool wait = true;
+
+	spin_lock_irq(&se->lock);
+	rq = i915_sched_get_active_request(se);
+	if (rq)
+		wait = !i915_sw_fence_signaled(&rq->submit);
+	spin_unlock_irq(&se->lock);
+
+	return wait;
+}
+
 static void heartbeat(struct work_struct *wrk)
 {
 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
@@ -92,6 +107,9 @@ static void heartbeat(struct work_struct *wrk)
 
 	rq = engine->heartbeat.systole;
 	if (rq && i915_request_completed(rq)) {
+		ENGINE_TRACE(engine,
+			     "heartbeat " RQ_FMT "completed\n",
+			     RQ_ARG(rq));
 		i915_request_put(rq);
 		engine->heartbeat.systole = NULL;
 	}
@@ -110,7 +128,7 @@ static void heartbeat(struct work_struct *wrk)
 				rq->emitted_jiffies + msecs_to_jiffies(delay)))
 			goto out;
 
-		if (!i915_sw_fence_signaled(&rq->submit)) {
+		if (!i915_sw_fence_signaled(&rq->submit) && is_wait(engine)) {
 			/*
 			 * Not yet submitted, system is stalled.
 			 *
@@ -121,6 +139,9 @@ static void heartbeat(struct work_struct *wrk)
 			 * but all other contexts, including the kernel
 			 * context are stuck waiting for the signal.
 			 */
+			ENGINE_TRACE(engine,
+				     "heartbeat " RQ_FMT " pending\n",
+				     RQ_ARG(rq));
 		} else if (rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
 			/*
 			 * Gradually raise the priority of the heartbeat to
@@ -134,10 +155,18 @@ static void heartbeat(struct work_struct *wrk)
 			if (rq->sched.attr.priority >= attr.priority)
 				attr.priority = I915_PRIORITY_BARRIER;
 
+			ENGINE_TRACE(engine,
+				     "bumping heartbeat " RQ_FMT " prio:%d\n",
+				     RQ_ARG(rq), attr.priority);
+
 			local_bh_disable();
 			i915_request_set_priority(rq, attr.priority);
 			local_bh_enable();
 		} else {
+			ENGINE_TRACE(engine,
+				     "heartbeat " RQ_FMT " stuck\n",
+				     RQ_ARG(rq));
+
 			if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 				show_heartbeat(rq, engine);
 
@@ -169,6 +198,7 @@ static void heartbeat(struct work_struct *wrk)
 	if (IS_ERR(rq))
 		goto unlock;
 
+	ENGINE_TRACE(engine, "heartbeat " RQ_FMT "started\n", RQ_ARG(rq));
 	heartbeat_commit(rq, &attr);
 
 unlock:
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 02aa3eba4ebb..f110348f2ae2 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2376,7 +2376,7 @@ static void sanitize_hwsp(struct intel_engine_cs *engine)
 
 static void execlists_sanitize(struct intel_engine_cs *engine)
 {
-	GEM_BUG_ON(execlists_active(&engine->execlists));
+	GEM_BUG_ON(*engine->execlists.active);
 
 	/*
 	 * Poison residual state on resume, in case the suspend didn't!
@@ -2752,6 +2752,20 @@ static void execlists_park(struct intel_engine_cs *engine)
 	cancel_timer(&engine->execlists.preempt);
 }
 
+static struct i915_request *
+execlists_active_request(const struct i915_sched *se)
+{
+	const struct intel_engine_cs *engine =
+		container_of(se, typeof(*engine), sched);
+	struct i915_request *rq;
+
+	rq = execlists_active(&engine->execlists);
+	if (rq)
+		rq = active_request(rq->context->timeline, rq);
+
+	return rq;
+}
+
 static bool can_preempt(struct intel_engine_cs *engine)
 {
 	if (INTEL_GEN(engine->i915) > 8)
@@ -2888,6 +2902,7 @@ static void init_execlists(struct intel_engine_cs *engine)
 	struct intel_uncore *uncore = engine->uncore;
 	u32 base = engine->mmio_base;
 
+	engine->sched.active_request = execlists_active_request;
 	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
 
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index cf3bbcbe7520..a4ab9863e921 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -324,20 +324,11 @@ static void reset_prepare(struct intel_engine_cs *engine)
 static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *pos, *rq;
+	struct i915_request *rq;
 	unsigned long flags;
 	u32 head;
 
-	rq = NULL;
 	spin_lock_irqsave(&se->lock, flags);
-	rcu_read_lock();
-	list_for_each_entry(pos, &se->requests, sched.link) {
-		if (!__i915_request_is_complete(pos)) {
-			rq = pos;
-			break;
-		}
-	}
-	rcu_read_unlock();
 
 	/*
 	 * The guilty request will get skipped on a hung engine.
@@ -361,6 +352,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 	 * subsequent hangs.
 	 */
 
+	rq = i915_sched_get_active_request(se);
 	if (rq) {
 		/*
 		 * Try to restore the logical GPU state to match the
@@ -379,9 +371,15 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 		 */
 		__i915_request_reset(rq, stalled);
 
+		ENGINE_TRACE(engine,
+			     "active (guilty:%s) request, replaying HEAD:%x\n",
+			     yesno(stalled), rq->head);
+
 		GEM_BUG_ON(rq->ring != engine->legacy.ring);
 		head = rq->head;
 	} else {
+		ENGINE_TRACE(engine, "idle reset, clearing ring, HEAD:%x\n",
+			     engine->legacy.ring->tail);
 		head = engine->legacy.ring->tail;
 	}
 	engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f8c50195b330..291f5b818925 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1262,15 +1262,11 @@ static bool record_context(struct i915_gem_context_coredump *e,
 	struct i915_gem_context *ctx;
 	bool simulated;
 
-	rcu_read_lock();
-
 	ctx = rcu_dereference(rq->context->gem_context);
 	if (ctx && !kref_get_unless_zero(&ctx->ref))
 		ctx = NULL;
-	if (!ctx) {
-		rcu_read_unlock();
+	if (!ctx)
 		return true;
-	}
 
 	if (I915_SELFTEST_ONLY(!ctx->client)) {
 		strcpy(e->comm, "[kernel]");
@@ -1279,8 +1275,6 @@ static bool record_context(struct i915_gem_context_coredump *e,
 		e->pid = pid_nr(i915_drm_client_pid(ctx->client));
 	}
 
-	rcu_read_unlock();
-
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
@@ -1368,12 +1362,14 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	struct intel_engine_capture_vma *vma = NULL;
 
+	rcu_read_lock();
 	ee->simulated |= record_context(&ee->context, rq);
+	rcu_read_unlock();
 	if (ee->simulated)
 		return NULL;
 
@@ -1436,19 +1432,21 @@ capture_engine(struct intel_engine_cs *engine,
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	struct intel_engine_capture_vma *capture = NULL;
 	struct intel_engine_coredump *ee;
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	unsigned long flags;
 
 	ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
 	if (!ee)
 		return NULL;
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	rq = i915_sched_get_active_request(se);
 	if (rq)
 		capture = intel_engine_coredump_add_request(ee, rq,
 							    ATOMIC_MAYFAIL);
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 	if (!capture) {
 		kfree(ee);
 		return NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 1764fd254df3..2d8debabfe28 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -235,7 +235,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp);
 
 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
@@ -299,7 +299,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 static inline struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	return NULL;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c03d3cedf497..792dd0bbea3b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -349,74 +349,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
-static struct i915_request * const *
-__engine_active(struct intel_engine_cs *engine)
-{
-	return READ_ONCE(engine->execlists.active);
-}
-
-static bool __request_in_flight(const struct i915_request *signal)
-{
-	struct i915_request * const *port, *rq;
-	bool inflight = false;
-
-	if (!i915_request_is_ready(signal))
-		return false;
-
-	/*
-	 * Even if we have unwound the request, it may still be on
-	 * the GPU (preempt-to-busy). If that request is inside an
-	 * unpreemptible critical section, it will not be removed. Some
-	 * GPU functions may even be stuck waiting for the paired request
-	 * (__await_execution) to be submitted and cannot be preempted
-	 * until the bond is executing.
-	 *
-	 * As we know that there are always preemption points between
-	 * requests, we know that only the currently executing request
-	 * may be still active even though we have cleared the flag.
-	 * However, we can't rely on our tracking of ELSP[0] to know
-	 * which request is currently active and so maybe stuck, as
-	 * the tracking maybe an event behind. Instead assume that
-	 * if the context is still inflight, then it is still active
-	 * even if the active flag has been cleared.
-	 *
-	 * To further complicate matters, if there a pending promotion, the HW
-	 * may either perform a context switch to the second inflight execlists,
-	 * or it may switch to the pending set of execlists. In the case of the
-	 * latter, it may send the ACK and we process the event copying the
-	 * pending[] over top of inflight[], _overwriting_ our *active. Since
-	 * this implies the HW is arbitrating and not struck in *active, we do
-	 * not worry about complete accuracy, but we do require no read/write
-	 * tearing of the pointer [the read of the pointer must be valid, even
-	 * as the array is being overwritten, for which we require the writes
-	 * to avoid tearing.]
-	 *
-	 * Note that the read of *execlists->active may race with the promotion
-	 * of execlists->pending[] to execlists->inflight[], overwritting
-	 * the value at *execlists->active. This is fine. The promotion implies
-	 * that we received an ACK from the HW, and so the context is not
-	 * stuck -- if we do not see ourselves in *active, the inflight status
-	 * is valid. If instead we see ourselves being copied into *active,
-	 * we are inflight and may signal the callback.
-	 */
-	if (!intel_context_inflight(signal->context))
-		return false;
-
-	rcu_read_lock();
-	for (port = __engine_active(signal->engine);
-	     (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
-	     port++) {
-		if (rq->context == signal->context) {
-			inflight = i915_seqno_passed(rq->fence.seqno,
-						     signal->fence.seqno);
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	return inflight;
-}
-
 static int
 __await_execution(struct i915_request *rq,
 		  struct i915_request *signal,
@@ -460,8 +392,7 @@ __await_execution(struct i915_request *rq,
 	 * the completed/retired request.
 	 */
 	if (llist_add(&cb->work.node.llist, &signal->execute_cb)) {
-		if (i915_request_is_active(signal) ||
-		    __request_in_flight(signal))
+		if (i915_request_is_executing(signal))
 			__notify_execute_cb_imm(signal);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c41582b96b46..7e722ccc9c4b 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -629,4 +629,20 @@ static inline bool i915_request_use_scheduler(const struct i915_request *rq)
 	return intel_engine_has_scheduler(rq->engine);
 }
 
+static inline bool i915_request_is_executing(const struct i915_request *rq)
+{
+	/* Is the request presently on the HW execution queue? */
+	if (i915_request_is_active(rq))
+		return true;
+
+	/*
+	 * However, if it is not presently on the HW execution queue, it
+	 * may have been recently removed from the queue, but is in fact
+	 * still executing until the HW has completed a preemption. We
+	 * need to double check with the backend for it to query the HW
+	 * to see if the request is still executing.
+	 */
+	return intel_context_inflight(rq->context);
+}
+
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index e8db7e614ff5..c1674a84ea74 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -112,6 +112,29 @@ static void init_ipi(struct i915_sched_ipi *ipi)
 	ipi->list = NULL;
 }
 
+static struct i915_request *
+i915_sched_default_active_request(const struct i915_sched *se)
+{
+	struct i915_request *rq, *active = NULL;
+
+	/*
+	 * We assume the simplest in-order execution queue with no preemption,
+	 * i.e. the order of se->erquests matches exactly the execution order
+	 * of the HW.
+	 */
+	list_for_each_entry(rq, &se->requests, sched.link) {
+		if (__i915_request_is_complete(rq))
+			continue;
+
+		if (__i915_request_has_started(rq))
+			active = rq;
+
+		break;
+	}
+
+	return active;
+}
+
 void i915_sched_init(struct i915_sched *se,
 		     struct device *dev,
 		     const char *name,
@@ -134,6 +157,7 @@ void i915_sched_init(struct i915_sched *se,
 	init_ipi(&se->ipi);
 
 	se->submit_request = i915_request_enqueue;
+	se->active_request = i915_sched_default_active_request;
 }
 
 void i915_sched_park(struct i915_sched *se)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 1803fc37bada..1095f2e9fb1b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -138,6 +138,18 @@ static inline void i915_sched_flush(struct i915_sched *se)
 	__i915_sched_flush(se, true);
 }
 
+/* Find the currently executing request on the backend */
+static inline struct i915_request *
+i915_sched_get_active_request(const struct i915_sched *se)
+{
+	lockdep_assert_held(&se->lock);
+
+	if (se->active_request)
+		return se->active_request(se);
+
+	return NULL;
+}
+
 void i915_request_show_with_schedule(struct drm_printer *m,
 				     const struct i915_request *rq,
 				     const char *prefix,
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 2d746af501d6..05579f61e6bd 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -37,6 +37,8 @@ struct i915_sched {
 	 */
 	void (*submit_request)(struct i915_request *rq);
 
+	struct i915_request *(*active_request)(const struct i915_sched *se);
+
 	struct list_head requests; /* active request, on HW */
 	struct list_head hold; /* ready requests, but on hold */
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler
  2021-02-07  2:31 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
@ 2021-02-07  2:31 ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2021-02-07  2:31 UTC (permalink / raw)
  To: intel-gfx

Since finding the currently active request starts by walking the
scheduler lists under the scheduler lock, move the routine to the
scheduler.

v2: Wrap se->active() with i915_sched_get_active_request()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |   3 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 112 +++---------------
 .../drm/i915/gt/intel_execlists_submission.c  |  16 ++-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  15 +--
 drivers/gpu/drm/i915/i915_gpu_error.c         |  18 ++-
 drivers/gpu/drm/i915/i915_gpu_error.h         |   4 +-
 drivers/gpu/drm/i915/i915_request.c           |  71 +----------
 drivers/gpu/drm/i915/i915_request.h           |  16 +++
 drivers/gpu/drm/i915/i915_scheduler.c         |  49 ++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |   8 ++
 drivers/gpu/drm/i915/i915_scheduler_types.h   |   2 +
 11 files changed, 124 insertions(+), 190 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 52bba16c62e8..c530839627bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -230,9 +230,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
 				   ktime_t *now);
 
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine);
-
 u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
 
 void intel_engine_init_active(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 3b299339fb62..14b37b8645df 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1175,31 +1175,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 	}
 }
 
-static bool ring_is_idle(struct intel_engine_cs *engine)
-{
-	bool idle = true;
-
-	if (I915_SELFTEST_ONLY(!engine->mmio_base))
-		return true;
-
-	if (!intel_engine_pm_get_if_awake(engine))
-		return true;
-
-	/* First check that no commands are left in the ring */
-	if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
-	    (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
-		idle = false;
-
-	/* No bit for gen2, so assume the CS parser is idle */
-	if (INTEL_GEN(engine->i915) > 2 &&
-	    !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
-		idle = false;
-
-	intel_engine_pm_put(engine);
-
-	return idle;
-}
-
 /**
  * intel_engine_is_idle() - Report if the engine has finished process all work
  * @engine: the intel_engine_cs
@@ -1210,14 +1185,12 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 bool intel_engine_is_idle(struct intel_engine_cs *engine)
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
+	struct i915_request *rq = NULL;
 
 	/* More white lies, if wedged, hw state is inconsistent */
 	if (intel_gt_is_wedged(engine->gt))
 		return true;
 
-	if (!intel_engine_pm_is_awake(engine))
-		return true;
-
 	/* Waiting to drain ELSP? */
 	synchronize_hardirq(to_pci_dev(engine->i915->drm.dev)->irq);
 	i915_sched_flush(se);
@@ -1226,8 +1199,16 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (!i915_sched_is_idle(se))
 		return false;
 
-	/* Ring stopped? */
-	return ring_is_idle(engine);
+	/* Execution complete? */
+	if (intel_engine_pm_get_if_awake(engine)) {
+		spin_lock_irq(&se->lock);
+		rq = i915_sched_get_active_request(se);
+		spin_unlock_irq(&se->lock);
+
+		intel_engine_pm_put(engine);
+	}
+
+	return !rq;
 }
 
 bool intel_engines_are_idle(struct intel_gt *gt)
@@ -1284,7 +1265,7 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }
 
-static struct intel_timeline *get_timeline(struct i915_request *rq)
+static struct intel_timeline *get_timeline(const struct i915_request *rq)
 {
 	struct intel_timeline *tl;
 
@@ -1512,7 +1493,8 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 	}
 }
 
-static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
+static void
+print_request_ring(struct drm_printer *m, const struct i915_request *rq)
 {
 	void *ring;
 	int size;
@@ -1597,7 +1579,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 {
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	intel_wakeref_t wakeref;
 	unsigned long flags;
 	ktime_t dummy;
@@ -1638,8 +1620,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	drm_printf(m, "\tRequests:\n");
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	i915_sched_get_active_request(se);
 	if (rq) {
 		struct intel_timeline *tl = get_timeline(rq);
 
@@ -1671,6 +1654,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	}
 	drm_printf(m, "\tOn hold?: %lu\n", list_count(&se->hold));
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 
 	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
 	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
@@ -1719,66 +1703,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
 	return ktime_add(total, start);
 }
 
-static bool match_ring(struct i915_request *rq)
-{
-	u32 ring = ENGINE_READ(rq->engine, RING_START);
-
-	return ring == i915_ggtt_offset(rq->ring->vma);
-}
-
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine)
-{
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *request, *active = NULL;
-
-	/*
-	 * We are called by the error capture, reset and to dump engine
-	 * state at random points in time. In particular, note that neither is
-	 * crucially ordered with an interrupt. After a hang, the GPU is dead
-	 * and we assume that no more writes can happen (we waited long enough
-	 * for all writes that were in transaction to be flushed) - adding an
-	 * extra delay for a recent interrupt is pointless. Hence, we do
-	 * not need an engine->irq_seqno_barrier() before the seqno reads.
-	 * At all other times, we must assume the GPU is still running, but
-	 * we only care about the snapshot of this moment.
-	 */
-	lockdep_assert_held(&se->lock);
-
-	rcu_read_lock();
-	request = execlists_active(&engine->execlists);
-	if (request) {
-		struct intel_timeline *tl = request->context->timeline;
-
-		list_for_each_entry_from_reverse(request, &tl->requests, link) {
-			if (__i915_request_is_complete(request))
-				break;
-
-			active = request;
-		}
-	}
-	rcu_read_unlock();
-	if (active)
-		return active;
-
-	list_for_each_entry(request, &se->requests, sched.link) {
-		if (__i915_request_is_complete(request))
-			continue;
-
-		if (!__i915_request_has_started(request))
-			continue;
-
-		/* More than one preemptible request may match! */
-		if (!match_ring(request))
-			continue;
-
-		active = request;
-		break;
-	}
-
-	return active;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "mock_engine.c"
 #include "selftest_engine.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 02aa3eba4ebb..4fb3a51ed063 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2376,7 +2376,7 @@ static void sanitize_hwsp(struct intel_engine_cs *engine)
 
 static void execlists_sanitize(struct intel_engine_cs *engine)
 {
-	GEM_BUG_ON(execlists_active(&engine->execlists));
+	GEM_BUG_ON(*engine->execlists.active);
 
 	/*
 	 * Poison residual state on resume, in case the suspend didn't!
@@ -2752,6 +2752,19 @@ static void execlists_park(struct intel_engine_cs *engine)
 	cancel_timer(&engine->execlists.preempt);
 }
 
+static struct i915_request *execlists_active_request(struct i915_sched *se)
+{
+	struct intel_engine_cs *engine =
+		container_of(se, typeof(*engine), sched);
+	struct i915_request *rq;
+
+	rq = execlists_active(&engine->execlists);
+	if (rq)
+		rq = active_request(rq->context->timeline, rq);
+
+	return rq;
+}
+
 static bool can_preempt(struct intel_engine_cs *engine)
 {
 	if (INTEL_GEN(engine->i915) > 8)
@@ -2888,6 +2901,7 @@ static void init_execlists(struct intel_engine_cs *engine)
 	struct intel_uncore *uncore = engine->uncore;
 	u32 base = engine->mmio_base;
 
+	engine->sched.active_request = execlists_active_request;
 	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
 
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index cf3bbcbe7520..4e1580854e80 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -324,20 +324,11 @@ static void reset_prepare(struct intel_engine_cs *engine)
 static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *pos, *rq;
+	struct i915_request *rq;
 	unsigned long flags;
 	u32 head;
 
-	rq = NULL;
 	spin_lock_irqsave(&se->lock, flags);
-	rcu_read_lock();
-	list_for_each_entry(pos, &se->requests, sched.link) {
-		if (!__i915_request_is_complete(pos)) {
-			rq = pos;
-			break;
-		}
-	}
-	rcu_read_unlock();
 
 	/*
 	 * The guilty request will get skipped on a hung engine.
@@ -361,6 +352,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 	 * subsequent hangs.
 	 */
 
+	rq = i915_sched_get_active_request(se);
 	if (rq) {
 		/*
 		 * Try to restore the logical GPU state to match the
@@ -382,6 +374,9 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 		GEM_BUG_ON(rq->ring != engine->legacy.ring);
 		head = rq->head;
 	} else {
+		list_for_each_entry(rq, &se->requests, sched.link)
+			i915_request_put(i915_request_mark_eio(rq));
+
 		head = engine->legacy.ring->tail;
 	}
 	engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f8c50195b330..291f5b818925 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1262,15 +1262,11 @@ static bool record_context(struct i915_gem_context_coredump *e,
 	struct i915_gem_context *ctx;
 	bool simulated;
 
-	rcu_read_lock();
-
 	ctx = rcu_dereference(rq->context->gem_context);
 	if (ctx && !kref_get_unless_zero(&ctx->ref))
 		ctx = NULL;
-	if (!ctx) {
-		rcu_read_unlock();
+	if (!ctx)
 		return true;
-	}
 
 	if (I915_SELFTEST_ONLY(!ctx->client)) {
 		strcpy(e->comm, "[kernel]");
@@ -1279,8 +1275,6 @@ static bool record_context(struct i915_gem_context_coredump *e,
 		e->pid = pid_nr(i915_drm_client_pid(ctx->client));
 	}
 
-	rcu_read_unlock();
-
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
@@ -1368,12 +1362,14 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	struct intel_engine_capture_vma *vma = NULL;
 
+	rcu_read_lock();
 	ee->simulated |= record_context(&ee->context, rq);
+	rcu_read_unlock();
 	if (ee->simulated)
 		return NULL;
 
@@ -1436,19 +1432,21 @@ capture_engine(struct intel_engine_cs *engine,
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	struct intel_engine_capture_vma *capture = NULL;
 	struct intel_engine_coredump *ee;
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	unsigned long flags;
 
 	ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
 	if (!ee)
 		return NULL;
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	rq = i915_sched_get_active_request(se);
 	if (rq)
 		capture = intel_engine_coredump_add_request(ee, rq,
 							    ATOMIC_MAYFAIL);
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 	if (!capture) {
 		kfree(ee);
 		return NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 1764fd254df3..2d8debabfe28 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -235,7 +235,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp);
 
 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
@@ -299,7 +299,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 static inline struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	return NULL;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c03d3cedf497..792dd0bbea3b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -349,74 +349,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
-static struct i915_request * const *
-__engine_active(struct intel_engine_cs *engine)
-{
-	return READ_ONCE(engine->execlists.active);
-}
-
-static bool __request_in_flight(const struct i915_request *signal)
-{
-	struct i915_request * const *port, *rq;
-	bool inflight = false;
-
-	if (!i915_request_is_ready(signal))
-		return false;
-
-	/*
-	 * Even if we have unwound the request, it may still be on
-	 * the GPU (preempt-to-busy). If that request is inside an
-	 * unpreemptible critical section, it will not be removed. Some
-	 * GPU functions may even be stuck waiting for the paired request
-	 * (__await_execution) to be submitted and cannot be preempted
-	 * until the bond is executing.
-	 *
-	 * As we know that there are always preemption points between
-	 * requests, we know that only the currently executing request
-	 * may be still active even though we have cleared the flag.
-	 * However, we can't rely on our tracking of ELSP[0] to know
-	 * which request is currently active and so maybe stuck, as
-	 * the tracking maybe an event behind. Instead assume that
-	 * if the context is still inflight, then it is still active
-	 * even if the active flag has been cleared.
-	 *
-	 * To further complicate matters, if there a pending promotion, the HW
-	 * may either perform a context switch to the second inflight execlists,
-	 * or it may switch to the pending set of execlists. In the case of the
-	 * latter, it may send the ACK and we process the event copying the
-	 * pending[] over top of inflight[], _overwriting_ our *active. Since
-	 * this implies the HW is arbitrating and not struck in *active, we do
-	 * not worry about complete accuracy, but we do require no read/write
-	 * tearing of the pointer [the read of the pointer must be valid, even
-	 * as the array is being overwritten, for which we require the writes
-	 * to avoid tearing.]
-	 *
-	 * Note that the read of *execlists->active may race with the promotion
-	 * of execlists->pending[] to execlists->inflight[], overwritting
-	 * the value at *execlists->active. This is fine. The promotion implies
-	 * that we received an ACK from the HW, and so the context is not
-	 * stuck -- if we do not see ourselves in *active, the inflight status
-	 * is valid. If instead we see ourselves being copied into *active,
-	 * we are inflight and may signal the callback.
-	 */
-	if (!intel_context_inflight(signal->context))
-		return false;
-
-	rcu_read_lock();
-	for (port = __engine_active(signal->engine);
-	     (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
-	     port++) {
-		if (rq->context == signal->context) {
-			inflight = i915_seqno_passed(rq->fence.seqno,
-						     signal->fence.seqno);
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	return inflight;
-}
-
 static int
 __await_execution(struct i915_request *rq,
 		  struct i915_request *signal,
@@ -460,8 +392,7 @@ __await_execution(struct i915_request *rq,
 	 * the completed/retired request.
 	 */
 	if (llist_add(&cb->work.node.llist, &signal->execute_cb)) {
-		if (i915_request_is_active(signal) ||
-		    __request_in_flight(signal))
+		if (i915_request_is_executing(signal))
 			__notify_execute_cb_imm(signal);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c41582b96b46..7e722ccc9c4b 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -629,4 +629,20 @@ static inline bool i915_request_use_scheduler(const struct i915_request *rq)
 	return intel_engine_has_scheduler(rq->engine);
 }
 
+static inline bool i915_request_is_executing(const struct i915_request *rq)
+{
+	/* Is the request presently on the HW execution queue? */
+	if (i915_request_is_active(rq))
+		return true;
+
+	/*
+	 * However, if it is not presently on the HW execution queue, it
+	 * may have been recently removed from the queue, but is in fact
+	 * still executing until the HW has completed a preemption. We
+	 * need to double check with the backend for it to query the HW
+	 * to see if the request is still executing.
+	 */
+	return intel_context_inflight(rq->context);
+}
+
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index e8db7e614ff5..351ec6773041 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -112,6 +112,54 @@ static void init_ipi(struct i915_sched_ipi *ipi)
 	ipi->list = NULL;
 }
 
+static bool match_ring(struct i915_request *rq)
+{
+	const struct intel_engine_cs *engine = rq->engine;
+	const struct intel_ring *ring = rq->ring;
+	u32 start = ENGINE_READ(engine, RING_START);
+
+	/* After a reset, RING_START will be zero. Match the first hit. */
+	return !start || start == i915_ggtt_offset(ring->vma);
+}
+
+static struct i915_request *
+i915_sched_default_active_request(struct i915_sched *se)
+{
+	struct i915_request *rq, *active = NULL;
+
+	/*
+	 * We are called by the error capture, reset and to dump engine
+	 * state at random points in time. In particular, note that neither is
+	 * crucially ordered with an interrupt. After a hang, the GPU is dead
+	 * and we assume that no more writes can happen (we waited long enough
+	 * for all writes that were in transaction to be flushed) - adding an
+	 * extra delay for a recent interrupt is pointless. Hence, we do
+	 * not need an engine->irq_seqno_barrier() before the seqno reads.
+	 * At all other times, we must assume the GPU is still running, but
+	 * we only care about the snapshot of this moment.
+	 */
+	lockdep_assert_held(&se->lock);
+
+	rcu_read_lock();
+	list_for_each_entry(rq, &se->requests, sched.link) {
+		if (__i915_request_is_complete(rq))
+			continue;
+
+		if (!__i915_request_has_started(rq))
+			continue;
+
+		/* More than one preemptible request may match! */
+		if (!match_ring(rq))
+			continue;
+
+		active = rq;
+		break;
+	}
+	rcu_read_unlock();
+
+	return active;
+}
+
 void i915_sched_init(struct i915_sched *se,
 		     struct device *dev,
 		     const char *name,
@@ -134,6 +182,7 @@ void i915_sched_init(struct i915_sched *se,
 	init_ipi(&se->ipi);
 
 	se->submit_request = i915_request_enqueue;
+	se->active_request = i915_sched_default_active_request;
 }
 
 void i915_sched_park(struct i915_sched *se)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 1803fc37bada..d6a7f15b953f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -138,6 +138,14 @@ static inline void i915_sched_flush(struct i915_sched *se)
 	__i915_sched_flush(se, true);
 }
 
+/* Find the currently executing request on the backend */
+static inline struct i915_request *
+i915_sched_get_active_request(struct i915_sched *se)
+{
+	lockdep_assert_held(&se->lock);
+	return se->active_request(se);
+}
+
 void i915_request_show_with_schedule(struct drm_printer *m,
 				     const struct i915_request *rq,
 				     const char *prefix,
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 2d746af501d6..2c2abe5f5a43 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -37,6 +37,8 @@ struct i915_sched {
 	 */
 	void (*submit_request)(struct i915_request *rq);
 
+	struct i915_request *(*active_request)(struct i915_sched *se);
+
 	struct list_head requests; /* active request, on HW */
 	struct list_head hold; /* ready requests, but on hold */
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler
  2021-02-06 22:31 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
@ 2021-02-06 22:31 ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2021-02-06 22:31 UTC (permalink / raw)
  To: intel-gfx

Since finding the currently active request starts by walking the
scheduler lists under the scheduler lock, move the routine to the
scheduler.

v2: Wrap se->active() with i915_sched_get_active_request()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  3 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 71 ++-----------------
 .../drm/i915/gt/intel_execlists_submission.c  | 16 ++++-
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 12 +---
 drivers/gpu/drm/i915/i915_gpu_error.c         | 18 +++--
 drivers/gpu/drm/i915/i915_gpu_error.h         |  4 +-
 drivers/gpu/drm/i915/i915_request.c           | 71 +------------------
 drivers/gpu/drm/i915/i915_request.h           | 16 +++++
 drivers/gpu/drm/i915/i915_scheduler.c         | 49 +++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  8 +++
 drivers/gpu/drm/i915/i915_scheduler_types.h   |  2 +
 11 files changed, 110 insertions(+), 160 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 52bba16c62e8..c530839627bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -230,9 +230,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
 				   ktime_t *now);
 
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine);
-
 u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
 
 void intel_engine_init_active(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 3b299339fb62..636a2190e535 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1284,7 +1284,7 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }
 
-static struct intel_timeline *get_timeline(struct i915_request *rq)
+static struct intel_timeline *get_timeline(const struct i915_request *rq)
 {
 	struct intel_timeline *tl;
 
@@ -1512,7 +1512,8 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 	}
 }
 
-static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
+static void
+print_request_ring(struct drm_printer *m, const struct i915_request *rq)
 {
 	void *ring;
 	int size;
@@ -1597,7 +1598,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 {
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	intel_wakeref_t wakeref;
 	unsigned long flags;
 	ktime_t dummy;
@@ -1638,8 +1639,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	drm_printf(m, "\tRequests:\n");
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	i915_sched_get_active_request(se);
 	if (rq) {
 		struct intel_timeline *tl = get_timeline(rq);
 
@@ -1671,6 +1673,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	}
 	drm_printf(m, "\tOn hold?: %lu\n", list_count(&se->hold));
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 
 	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
 	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
@@ -1719,66 +1722,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
 	return ktime_add(total, start);
 }
 
-static bool match_ring(struct i915_request *rq)
-{
-	u32 ring = ENGINE_READ(rq->engine, RING_START);
-
-	return ring == i915_ggtt_offset(rq->ring->vma);
-}
-
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine)
-{
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *request, *active = NULL;
-
-	/*
-	 * We are called by the error capture, reset and to dump engine
-	 * state at random points in time. In particular, note that neither is
-	 * crucially ordered with an interrupt. After a hang, the GPU is dead
-	 * and we assume that no more writes can happen (we waited long enough
-	 * for all writes that were in transaction to be flushed) - adding an
-	 * extra delay for a recent interrupt is pointless. Hence, we do
-	 * not need an engine->irq_seqno_barrier() before the seqno reads.
-	 * At all other times, we must assume the GPU is still running, but
-	 * we only care about the snapshot of this moment.
-	 */
-	lockdep_assert_held(&se->lock);
-
-	rcu_read_lock();
-	request = execlists_active(&engine->execlists);
-	if (request) {
-		struct intel_timeline *tl = request->context->timeline;
-
-		list_for_each_entry_from_reverse(request, &tl->requests, link) {
-			if (__i915_request_is_complete(request))
-				break;
-
-			active = request;
-		}
-	}
-	rcu_read_unlock();
-	if (active)
-		return active;
-
-	list_for_each_entry(request, &se->requests, sched.link) {
-		if (__i915_request_is_complete(request))
-			continue;
-
-		if (!__i915_request_has_started(request))
-			continue;
-
-		/* More than one preemptible request may match! */
-		if (!match_ring(request))
-			continue;
-
-		active = request;
-		break;
-	}
-
-	return active;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "mock_engine.c"
 #include "selftest_engine.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 02aa3eba4ebb..4fb3a51ed063 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2376,7 +2376,7 @@ static void sanitize_hwsp(struct intel_engine_cs *engine)
 
 static void execlists_sanitize(struct intel_engine_cs *engine)
 {
-	GEM_BUG_ON(execlists_active(&engine->execlists));
+	GEM_BUG_ON(*engine->execlists.active);
 
 	/*
 	 * Poison residual state on resume, in case the suspend didn't!
@@ -2752,6 +2752,19 @@ static void execlists_park(struct intel_engine_cs *engine)
 	cancel_timer(&engine->execlists.preempt);
 }
 
+static struct i915_request *execlists_active_request(struct i915_sched *se)
+{
+	struct intel_engine_cs *engine =
+		container_of(se, typeof(*engine), sched);
+	struct i915_request *rq;
+
+	rq = execlists_active(&engine->execlists);
+	if (rq)
+		rq = active_request(rq->context->timeline, rq);
+
+	return rq;
+}
+
 static bool can_preempt(struct intel_engine_cs *engine)
 {
 	if (INTEL_GEN(engine->i915) > 8)
@@ -2888,6 +2901,7 @@ static void init_execlists(struct intel_engine_cs *engine)
 	struct intel_uncore *uncore = engine->uncore;
 	u32 base = engine->mmio_base;
 
+	engine->sched.active_request = execlists_active_request;
 	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
 
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index cf3bbcbe7520..0c332ee07211 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -324,20 +324,11 @@ static void reset_prepare(struct intel_engine_cs *engine)
 static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *pos, *rq;
+	struct i915_request *rq;
 	unsigned long flags;
 	u32 head;
 
-	rq = NULL;
 	spin_lock_irqsave(&se->lock, flags);
-	rcu_read_lock();
-	list_for_each_entry(pos, &se->requests, sched.link) {
-		if (!__i915_request_is_complete(pos)) {
-			rq = pos;
-			break;
-		}
-	}
-	rcu_read_unlock();
 
 	/*
 	 * The guilty request will get skipped on a hung engine.
@@ -361,6 +352,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 	 * subsequent hangs.
 	 */
 
+	rq = i915_sched_get_active_request(se);
 	if (rq) {
 		/*
 		 * Try to restore the logical GPU state to match the
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f8c50195b330..291f5b818925 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1262,15 +1262,11 @@ static bool record_context(struct i915_gem_context_coredump *e,
 	struct i915_gem_context *ctx;
 	bool simulated;
 
-	rcu_read_lock();
-
 	ctx = rcu_dereference(rq->context->gem_context);
 	if (ctx && !kref_get_unless_zero(&ctx->ref))
 		ctx = NULL;
-	if (!ctx) {
-		rcu_read_unlock();
+	if (!ctx)
 		return true;
-	}
 
 	if (I915_SELFTEST_ONLY(!ctx->client)) {
 		strcpy(e->comm, "[kernel]");
@@ -1279,8 +1275,6 @@ static bool record_context(struct i915_gem_context_coredump *e,
 		e->pid = pid_nr(i915_drm_client_pid(ctx->client));
 	}
 
-	rcu_read_unlock();
-
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
@@ -1368,12 +1362,14 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	struct intel_engine_capture_vma *vma = NULL;
 
+	rcu_read_lock();
 	ee->simulated |= record_context(&ee->context, rq);
+	rcu_read_unlock();
 	if (ee->simulated)
 		return NULL;
 
@@ -1436,19 +1432,21 @@ capture_engine(struct intel_engine_cs *engine,
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	struct intel_engine_capture_vma *capture = NULL;
 	struct intel_engine_coredump *ee;
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	unsigned long flags;
 
 	ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
 	if (!ee)
 		return NULL;
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	rq = i915_sched_get_active_request(se);
 	if (rq)
 		capture = intel_engine_coredump_add_request(ee, rq,
 							    ATOMIC_MAYFAIL);
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 	if (!capture) {
 		kfree(ee);
 		return NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 1764fd254df3..2d8debabfe28 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -235,7 +235,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp);
 
 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
@@ -299,7 +299,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 static inline struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	return NULL;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c03d3cedf497..792dd0bbea3b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -349,74 +349,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
-static struct i915_request * const *
-__engine_active(struct intel_engine_cs *engine)
-{
-	return READ_ONCE(engine->execlists.active);
-}
-
-static bool __request_in_flight(const struct i915_request *signal)
-{
-	struct i915_request * const *port, *rq;
-	bool inflight = false;
-
-	if (!i915_request_is_ready(signal))
-		return false;
-
-	/*
-	 * Even if we have unwound the request, it may still be on
-	 * the GPU (preempt-to-busy). If that request is inside an
-	 * unpreemptible critical section, it will not be removed. Some
-	 * GPU functions may even be stuck waiting for the paired request
-	 * (__await_execution) to be submitted and cannot be preempted
-	 * until the bond is executing.
-	 *
-	 * As we know that there are always preemption points between
-	 * requests, we know that only the currently executing request
-	 * may be still active even though we have cleared the flag.
-	 * However, we can't rely on our tracking of ELSP[0] to know
-	 * which request is currently active and so maybe stuck, as
-	 * the tracking maybe an event behind. Instead assume that
-	 * if the context is still inflight, then it is still active
-	 * even if the active flag has been cleared.
-	 *
-	 * To further complicate matters, if there a pending promotion, the HW
-	 * may either perform a context switch to the second inflight execlists,
-	 * or it may switch to the pending set of execlists. In the case of the
-	 * latter, it may send the ACK and we process the event copying the
-	 * pending[] over top of inflight[], _overwriting_ our *active. Since
-	 * this implies the HW is arbitrating and not struck in *active, we do
-	 * not worry about complete accuracy, but we do require no read/write
-	 * tearing of the pointer [the read of the pointer must be valid, even
-	 * as the array is being overwritten, for which we require the writes
-	 * to avoid tearing.]
-	 *
-	 * Note that the read of *execlists->active may race with the promotion
-	 * of execlists->pending[] to execlists->inflight[], overwritting
-	 * the value at *execlists->active. This is fine. The promotion implies
-	 * that we received an ACK from the HW, and so the context is not
-	 * stuck -- if we do not see ourselves in *active, the inflight status
-	 * is valid. If instead we see ourselves being copied into *active,
-	 * we are inflight and may signal the callback.
-	 */
-	if (!intel_context_inflight(signal->context))
-		return false;
-
-	rcu_read_lock();
-	for (port = __engine_active(signal->engine);
-	     (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
-	     port++) {
-		if (rq->context == signal->context) {
-			inflight = i915_seqno_passed(rq->fence.seqno,
-						     signal->fence.seqno);
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	return inflight;
-}
-
 static int
 __await_execution(struct i915_request *rq,
 		  struct i915_request *signal,
@@ -460,8 +392,7 @@ __await_execution(struct i915_request *rq,
 	 * the completed/retired request.
 	 */
 	if (llist_add(&cb->work.node.llist, &signal->execute_cb)) {
-		if (i915_request_is_active(signal) ||
-		    __request_in_flight(signal))
+		if (i915_request_is_executing(signal))
 			__notify_execute_cb_imm(signal);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c41582b96b46..7e722ccc9c4b 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -629,4 +629,20 @@ static inline bool i915_request_use_scheduler(const struct i915_request *rq)
 	return intel_engine_has_scheduler(rq->engine);
 }
 
+static inline bool i915_request_is_executing(const struct i915_request *rq)
+{
+	/* Is the request presently on the HW execution queue? */
+	if (i915_request_is_active(rq))
+		return true;
+
+	/*
+	 * However, if it is not presently on the HW execution queue, it
+	 * may have been recently removed from the queue, but is in fact
+	 * still executing until the HW has completed a preemption. We
+	 * need to double check with the backend for it to query the HW
+	 * to see if the request is still executing.
+	 */
+	return intel_context_inflight(rq->context);
+}
+
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index e8db7e614ff5..351ec6773041 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -112,6 +112,54 @@ static void init_ipi(struct i915_sched_ipi *ipi)
 	ipi->list = NULL;
 }
 
+static bool match_ring(struct i915_request *rq)
+{
+	const struct intel_engine_cs *engine = rq->engine;
+	const struct intel_ring *ring = rq->ring;
+	u32 start = ENGINE_READ(engine, RING_START);
+
+	/* After a reset, RING_START will be zero. Match the first hit. */
+	return !start || start == i915_ggtt_offset(ring->vma);
+}
+
+static struct i915_request *
+i915_sched_default_active_request(struct i915_sched *se)
+{
+	struct i915_request *rq, *active = NULL;
+
+	/*
+	 * We are called by the error capture, reset and to dump engine
+	 * state at random points in time. In particular, note that neither is
+	 * crucially ordered with an interrupt. After a hang, the GPU is dead
+	 * and we assume that no more writes can happen (we waited long enough
+	 * for all writes that were in transaction to be flushed) - adding an
+	 * extra delay for a recent interrupt is pointless. Hence, we do
+	 * not need an engine->irq_seqno_barrier() before the seqno reads.
+	 * At all other times, we must assume the GPU is still running, but
+	 * we only care about the snapshot of this moment.
+	 */
+	lockdep_assert_held(&se->lock);
+
+	rcu_read_lock();
+	list_for_each_entry(rq, &se->requests, sched.link) {
+		if (__i915_request_is_complete(rq))
+			continue;
+
+		if (!__i915_request_has_started(rq))
+			continue;
+
+		/* More than one preemptible request may match! */
+		if (!match_ring(rq))
+			continue;
+
+		active = rq;
+		break;
+	}
+	rcu_read_unlock();
+
+	return active;
+}
+
 void i915_sched_init(struct i915_sched *se,
 		     struct device *dev,
 		     const char *name,
@@ -134,6 +182,7 @@ void i915_sched_init(struct i915_sched *se,
 	init_ipi(&se->ipi);
 
 	se->submit_request = i915_request_enqueue;
+	se->active_request = i915_sched_default_active_request;
 }
 
 void i915_sched_park(struct i915_sched *se)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 1803fc37bada..d6a7f15b953f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -138,6 +138,14 @@ static inline void i915_sched_flush(struct i915_sched *se)
 	__i915_sched_flush(se, true);
 }
 
+/* Find the currently executing request on the backend */
+static inline struct i915_request *
+i915_sched_get_active_request(struct i915_sched *se)
+{
+	lockdep_assert_held(&se->lock);
+	return se->active_request(se);
+}
+
 void i915_request_show_with_schedule(struct drm_printer *m,
 				     const struct i915_request *rq,
 				     const char *prefix,
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 2d746af501d6..2c2abe5f5a43 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -37,6 +37,8 @@ struct i915_sched {
 	 */
 	void (*submit_request)(struct i915_request *rq);
 
+	struct i915_request *(*active_request)(struct i915_sched *se);
+
 	struct list_head requests; /* active request, on HW */
 	struct list_head hold; /* ready requests, but on hold */
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler
  2021-02-06 16:32 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
@ 2021-02-06 16:32 ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2021-02-06 16:32 UTC (permalink / raw)
  To: intel-gfx

Since finding the currently active request starts by walking the
scheduler lists under the scheduler lock, move the routine to the
scheduler.

v2: Wrap se->active() with i915_sched_get_active_request()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  3 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 71 ++--------------
 .../drm/i915/gt/intel_execlists_submission.c  | 82 ++++++++++++++++++-
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 12 +--
 drivers/gpu/drm/i915/i915_gpu_error.c         | 18 ++--
 drivers/gpu/drm/i915/i915_gpu_error.h         |  4 +-
 drivers/gpu/drm/i915/i915_request.c           | 71 +---------------
 drivers/gpu/drm/i915/i915_request.h           | 22 +++++
 drivers/gpu/drm/i915/i915_scheduler.c         | 49 +++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  8 ++
 drivers/gpu/drm/i915/i915_scheduler_types.h   |  4 +
 11 files changed, 184 insertions(+), 160 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 52bba16c62e8..c530839627bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -230,9 +230,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
 				   ktime_t *now);
 
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine);
-
 u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
 
 void intel_engine_init_active(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 3b299339fb62..636a2190e535 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1284,7 +1284,7 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }
 
-static struct intel_timeline *get_timeline(struct i915_request *rq)
+static struct intel_timeline *get_timeline(const struct i915_request *rq)
 {
 	struct intel_timeline *tl;
 
@@ -1512,7 +1512,8 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 	}
 }
 
-static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
+static void
+print_request_ring(struct drm_printer *m, const struct i915_request *rq)
 {
 	void *ring;
 	int size;
@@ -1597,7 +1598,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 {
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	intel_wakeref_t wakeref;
 	unsigned long flags;
 	ktime_t dummy;
@@ -1638,8 +1639,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	drm_printf(m, "\tRequests:\n");
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	i915_sched_get_active_request(se);
 	if (rq) {
 		struct intel_timeline *tl = get_timeline(rq);
 
@@ -1671,6 +1673,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	}
 	drm_printf(m, "\tOn hold?: %lu\n", list_count(&se->hold));
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 
 	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
 	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
@@ -1719,66 +1722,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
 	return ktime_add(total, start);
 }
 
-static bool match_ring(struct i915_request *rq)
-{
-	u32 ring = ENGINE_READ(rq->engine, RING_START);
-
-	return ring == i915_ggtt_offset(rq->ring->vma);
-}
-
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine)
-{
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *request, *active = NULL;
-
-	/*
-	 * We are called by the error capture, reset and to dump engine
-	 * state at random points in time. In particular, note that neither is
-	 * crucially ordered with an interrupt. After a hang, the GPU is dead
-	 * and we assume that no more writes can happen (we waited long enough
-	 * for all writes that were in transaction to be flushed) - adding an
-	 * extra delay for a recent interrupt is pointless. Hence, we do
-	 * not need an engine->irq_seqno_barrier() before the seqno reads.
-	 * At all other times, we must assume the GPU is still running, but
-	 * we only care about the snapshot of this moment.
-	 */
-	lockdep_assert_held(&se->lock);
-
-	rcu_read_lock();
-	request = execlists_active(&engine->execlists);
-	if (request) {
-		struct intel_timeline *tl = request->context->timeline;
-
-		list_for_each_entry_from_reverse(request, &tl->requests, link) {
-			if (__i915_request_is_complete(request))
-				break;
-
-			active = request;
-		}
-	}
-	rcu_read_unlock();
-	if (active)
-		return active;
-
-	list_for_each_entry(request, &se->requests, sched.link) {
-		if (__i915_request_is_complete(request))
-			continue;
-
-		if (!__i915_request_has_started(request))
-			continue;
-
-		/* More than one preemptible request may match! */
-		if (!match_ring(request))
-			continue;
-
-		active = request;
-		break;
-	}
-
-	return active;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "mock_engine.c"
 #include "selftest_engine.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 02aa3eba4ebb..b2b9e5b889a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2376,7 +2376,7 @@ static void sanitize_hwsp(struct intel_engine_cs *engine)
 
 static void execlists_sanitize(struct intel_engine_cs *engine)
 {
-	GEM_BUG_ON(execlists_active(&engine->execlists));
+	GEM_BUG_ON(*engine->execlists.active);
 
 	/*
 	 * Poison residual state on resume, in case the suspend didn't!
@@ -2752,6 +2752,84 @@ static void execlists_park(struct intel_engine_cs *engine)
 	cancel_timer(&engine->execlists.preempt);
 }
 
+static struct i915_request *execlists_active_request(struct i915_sched *se)
+{
+	struct intel_engine_cs *engine =
+		container_of(se, typeof(*engine), sched);
+	struct i915_request *rq;
+
+	rq = execlists_active(&engine->execlists);
+	if (rq)
+		rq = active_request(rq->context->timeline, rq);
+
+	return rq;
+}
+
+static bool execlists_is_executing(const struct i915_request *rq)
+{
+	struct i915_sched *se = i915_request_get_scheduler(rq);
+	struct intel_engine_execlists *el =
+		&container_of(se, struct intel_engine_cs, sched)->execlists;
+	struct i915_request * const *port, *p;
+	bool inflight = false;
+
+	if (!i915_request_is_ready(rq))
+		return false;
+
+	/*
+	 * Even if we have unwound the request, it may still be on
+	 * the GPU (preempt-to-busy). If that request is inside an
+	 * unpreemptible critical section, it will not be removed. Some
+	 * GPU functions may even be stuck waiting for the paired request
+	 * (__await_execution) to be submitted and cannot be preempted
+	 * until the bond is executing.
+	 *
+	 * As we know that there are always preemption points between
+	 * requests, we know that only the currently executing request
+	 * may be still active even though we have cleared the flag.
+	 * However, we can't rely on our tracking of ELSP[0] to know
+	 * which request is currently active and so maybe stuck, as
+	 * the tracking maybe an event behind. Instead assume that
+	 * if the context is still inflight, then it is still active
+	 * even if the active flag has been cleared.
+	 *
+	 * To further complicate matters, if there a pending promotion, the HW
+	 * may either perform a context switch to the second inflight execlists,
+	 * or it may switch to the pending set of execlists. In the case of the
+	 * latter, it may send the ACK and we process the event copying the
+	 * pending[] over top of inflight[], _overwriting_ our *active. Since
+	 * this implies the HW is arbitrating and not struck in *active, we do
+	 * not worry about complete accuracy, but we do require no read/write
+	 * tearing of the pointer [the read of the pointer must be valid, even
+	 * as the array is being overwritten, for which we require the writes
+	 * to avoid tearing.]
+	 *
+	 * Note that the read of *execlists->active may race with the promotion
+	 * of execlists->pending[] to execlists->inflight[], overwriting
+	 * the value at *execlists->active. This is fine. The promotion implies
+	 * that we received an ACK from the HW, and so the context is not
+	 * stuck -- if we do not see ourselves in *active, the inflight status
+	 * is valid. If instead we see ourselves being copied into *active,
+	 * we are inflight and may signal the callback.
+	 */
+	if (!intel_context_inflight(rq->context))
+		return false;
+
+	rcu_read_lock();
+	for (port = READ_ONCE(el->active);
+	     (p = READ_ONCE(*port)); /* may race with promotion of pending[] */
+	     port++) {
+		if (p->context == rq->context) {
+			inflight = i915_seqno_passed(p->fence.seqno,
+						     rq->fence.seqno);
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return inflight;
+}
+
 static bool can_preempt(struct intel_engine_cs *engine)
 {
 	if (INTEL_GEN(engine->i915) > 8)
@@ -2888,6 +2966,8 @@ static void init_execlists(struct intel_engine_cs *engine)
 	struct intel_uncore *uncore = engine->uncore;
 	u32 base = engine->mmio_base;
 
+	engine->sched.active_request = execlists_active_request;
+	engine->sched.is_executing = execlists_is_executing;
 	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
 
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index cf3bbcbe7520..0c332ee07211 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -324,20 +324,11 @@ static void reset_prepare(struct intel_engine_cs *engine)
 static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *pos, *rq;
+	struct i915_request *rq;
 	unsigned long flags;
 	u32 head;
 
-	rq = NULL;
 	spin_lock_irqsave(&se->lock, flags);
-	rcu_read_lock();
-	list_for_each_entry(pos, &se->requests, sched.link) {
-		if (!__i915_request_is_complete(pos)) {
-			rq = pos;
-			break;
-		}
-	}
-	rcu_read_unlock();
 
 	/*
 	 * The guilty request will get skipped on a hung engine.
@@ -361,6 +352,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 	 * subsequent hangs.
 	 */
 
+	rq = i915_sched_get_active_request(se);
 	if (rq) {
 		/*
 		 * Try to restore the logical GPU state to match the
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f8c50195b330..291f5b818925 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1262,15 +1262,11 @@ static bool record_context(struct i915_gem_context_coredump *e,
 	struct i915_gem_context *ctx;
 	bool simulated;
 
-	rcu_read_lock();
-
 	ctx = rcu_dereference(rq->context->gem_context);
 	if (ctx && !kref_get_unless_zero(&ctx->ref))
 		ctx = NULL;
-	if (!ctx) {
-		rcu_read_unlock();
+	if (!ctx)
 		return true;
-	}
 
 	if (I915_SELFTEST_ONLY(!ctx->client)) {
 		strcpy(e->comm, "[kernel]");
@@ -1279,8 +1275,6 @@ static bool record_context(struct i915_gem_context_coredump *e,
 		e->pid = pid_nr(i915_drm_client_pid(ctx->client));
 	}
 
-	rcu_read_unlock();
-
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
@@ -1368,12 +1362,14 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	struct intel_engine_capture_vma *vma = NULL;
 
+	rcu_read_lock();
 	ee->simulated |= record_context(&ee->context, rq);
+	rcu_read_unlock();
 	if (ee->simulated)
 		return NULL;
 
@@ -1436,19 +1432,21 @@ capture_engine(struct intel_engine_cs *engine,
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	struct intel_engine_capture_vma *capture = NULL;
 	struct intel_engine_coredump *ee;
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	unsigned long flags;
 
 	ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
 	if (!ee)
 		return NULL;
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	rq = i915_sched_get_active_request(se);
 	if (rq)
 		capture = intel_engine_coredump_add_request(ee, rq,
 							    ATOMIC_MAYFAIL);
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 	if (!capture) {
 		kfree(ee);
 		return NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 1764fd254df3..2d8debabfe28 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -235,7 +235,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp);
 
 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
@@ -299,7 +299,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 static inline struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	return NULL;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c03d3cedf497..792dd0bbea3b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -349,74 +349,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
-static struct i915_request * const *
-__engine_active(struct intel_engine_cs *engine)
-{
-	return READ_ONCE(engine->execlists.active);
-}
-
-static bool __request_in_flight(const struct i915_request *signal)
-{
-	struct i915_request * const *port, *rq;
-	bool inflight = false;
-
-	if (!i915_request_is_ready(signal))
-		return false;
-
-	/*
-	 * Even if we have unwound the request, it may still be on
-	 * the GPU (preempt-to-busy). If that request is inside an
-	 * unpreemptible critical section, it will not be removed. Some
-	 * GPU functions may even be stuck waiting for the paired request
-	 * (__await_execution) to be submitted and cannot be preempted
-	 * until the bond is executing.
-	 *
-	 * As we know that there are always preemption points between
-	 * requests, we know that only the currently executing request
-	 * may be still active even though we have cleared the flag.
-	 * However, we can't rely on our tracking of ELSP[0] to know
-	 * which request is currently active and so maybe stuck, as
-	 * the tracking maybe an event behind. Instead assume that
-	 * if the context is still inflight, then it is still active
-	 * even if the active flag has been cleared.
-	 *
-	 * To further complicate matters, if there a pending promotion, the HW
-	 * may either perform a context switch to the second inflight execlists,
-	 * or it may switch to the pending set of execlists. In the case of the
-	 * latter, it may send the ACK and we process the event copying the
-	 * pending[] over top of inflight[], _overwriting_ our *active. Since
-	 * this implies the HW is arbitrating and not struck in *active, we do
-	 * not worry about complete accuracy, but we do require no read/write
-	 * tearing of the pointer [the read of the pointer must be valid, even
-	 * as the array is being overwritten, for which we require the writes
-	 * to avoid tearing.]
-	 *
-	 * Note that the read of *execlists->active may race with the promotion
-	 * of execlists->pending[] to execlists->inflight[], overwritting
-	 * the value at *execlists->active. This is fine. The promotion implies
-	 * that we received an ACK from the HW, and so the context is not
-	 * stuck -- if we do not see ourselves in *active, the inflight status
-	 * is valid. If instead we see ourselves being copied into *active,
-	 * we are inflight and may signal the callback.
-	 */
-	if (!intel_context_inflight(signal->context))
-		return false;
-
-	rcu_read_lock();
-	for (port = __engine_active(signal->engine);
-	     (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
-	     port++) {
-		if (rq->context == signal->context) {
-			inflight = i915_seqno_passed(rq->fence.seqno,
-						     signal->fence.seqno);
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	return inflight;
-}
-
 static int
 __await_execution(struct i915_request *rq,
 		  struct i915_request *signal,
@@ -460,8 +392,7 @@ __await_execution(struct i915_request *rq,
 	 * the completed/retired request.
 	 */
 	if (llist_add(&cb->work.node.llist, &signal->execute_cb)) {
-		if (i915_request_is_active(signal) ||
-		    __request_in_flight(signal))
+		if (i915_request_is_executing(signal))
 			__notify_execute_cb_imm(signal);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c41582b96b46..10561f78dd3c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -629,4 +629,26 @@ static inline bool i915_request_use_scheduler(const struct i915_request *rq)
 	return intel_engine_has_scheduler(rq->engine);
 }
 
+static inline bool i915_request_is_executing(const struct i915_request *rq)
+{
+	struct i915_sched *se;
+
+	/* Is the request presently on the HW execution queue? */
+	if (i915_request_is_active(rq))
+		return true;
+
+	se = i915_request_get_scheduler(rq);
+	if (!se->is_executing)
+		return false;
+
+	/*
+	 * However, if it is not presently on the HW execution queue, it
+	 * may have been recently removed from the queue, but is in fact
+	 * still executing until the HW has completed a preemption. We
+	 * need to double check with the backend for it to query the HW
+	 * to see if the request is still executing.
+	 */
+	return se->is_executing(rq);
+}
+
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index e8db7e614ff5..351ec6773041 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -112,6 +112,54 @@ static void init_ipi(struct i915_sched_ipi *ipi)
 	ipi->list = NULL;
 }
 
+static bool match_ring(struct i915_request *rq)
+{
+	const struct intel_engine_cs *engine = rq->engine;
+	const struct intel_ring *ring = rq->ring;
+	u32 start = ENGINE_READ(engine, RING_START);
+
+	/* After a reset, RING_START will be zero. Match the first hit. */
+	return !start || start == i915_ggtt_offset(ring->vma);
+}
+
+static struct i915_request *
+i915_sched_default_active_request(struct i915_sched *se)
+{
+	struct i915_request *rq, *active = NULL;
+
+	/*
+	 * We are called by the error capture, reset and to dump engine
+	 * state at random points in time. In particular, note that neither is
+	 * crucially ordered with an interrupt. After a hang, the GPU is dead
+	 * and we assume that no more writes can happen (we waited long enough
+	 * for all writes that were in transaction to be flushed) - adding an
+	 * extra delay for a recent interrupt is pointless. Hence, we do
+	 * not need an engine->irq_seqno_barrier() before the seqno reads.
+	 * At all other times, we must assume the GPU is still running, but
+	 * we only care about the snapshot of this moment.
+	 */
+	lockdep_assert_held(&se->lock);
+
+	rcu_read_lock();
+	list_for_each_entry(rq, &se->requests, sched.link) {
+		if (__i915_request_is_complete(rq))
+			continue;
+
+		if (!__i915_request_has_started(rq))
+			continue;
+
+		/* More than one preemptible request may match! */
+		if (!match_ring(rq))
+			continue;
+
+		active = rq;
+		break;
+	}
+	rcu_read_unlock();
+
+	return active;
+}
+
 void i915_sched_init(struct i915_sched *se,
 		     struct device *dev,
 		     const char *name,
@@ -134,6 +182,7 @@ void i915_sched_init(struct i915_sched *se,
 	init_ipi(&se->ipi);
 
 	se->submit_request = i915_request_enqueue;
+	se->active_request = i915_sched_default_active_request;
 }
 
 void i915_sched_park(struct i915_sched *se)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 1803fc37bada..d6a7f15b953f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -138,6 +138,14 @@ static inline void i915_sched_flush(struct i915_sched *se)
 	__i915_sched_flush(se, true);
 }
 
+/* Find the currently executing request on the backend */
+static inline struct i915_request *
+i915_sched_get_active_request(struct i915_sched *se)
+{
+	lockdep_assert_held(&se->lock);
+	return se->active_request(se);
+}
+
 void i915_request_show_with_schedule(struct drm_printer *m,
 				     const struct i915_request *rq,
 				     const char *prefix,
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 2d746af501d6..b433a3a861b9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -37,6 +37,10 @@ struct i915_sched {
 	 */
 	void (*submit_request)(struct i915_request *rq);
 
+	struct i915_request *(*active_request)(struct i915_sched *se);
+
+	bool (*is_executing)(const struct i915_request *rq);
+
 	struct list_head requests; /* active request, on HW */
 	struct list_head hold; /* ready requests, but on hold */
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler
  2021-02-06 14:24 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
@ 2021-02-06 14:24 ` Chris Wilson
  0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2021-02-06 14:24 UTC (permalink / raw)
  To: intel-gfx

Since finding the currently active request starts by walking the
scheduler lists under the scheduler lock, move the routine to the
scheduler.

v2: Wrap se->active() with i915_sched_get_active_request()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h        |  3 -
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 71 ++--------------
 .../drm/i915/gt/intel_execlists_submission.c  | 82 ++++++++++++++++++-
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 12 +--
 drivers/gpu/drm/i915/i915_gpu_error.c         | 18 ++--
 drivers/gpu/drm/i915/i915_gpu_error.h         |  4 +-
 drivers/gpu/drm/i915/i915_request.c           | 71 +---------------
 drivers/gpu/drm/i915/i915_request.h           | 22 +++++
 drivers/gpu/drm/i915/i915_scheduler.c         | 47 +++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |  8 ++
 drivers/gpu/drm/i915/i915_scheduler_types.h   |  4 +
 11 files changed, 182 insertions(+), 160 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 52bba16c62e8..c530839627bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -230,9 +230,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
 				   ktime_t *now);
 
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine);
-
 u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
 
 void intel_engine_init_active(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 3b299339fb62..636a2190e535 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1284,7 +1284,7 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }
 
-static struct intel_timeline *get_timeline(struct i915_request *rq)
+static struct intel_timeline *get_timeline(const struct i915_request *rq)
 {
 	struct intel_timeline *tl;
 
@@ -1512,7 +1512,8 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 	}
 }
 
-static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
+static void
+print_request_ring(struct drm_printer *m, const struct i915_request *rq)
 {
 	void *ring;
 	int size;
@@ -1597,7 +1598,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 {
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	intel_wakeref_t wakeref;
 	unsigned long flags;
 	ktime_t dummy;
@@ -1638,8 +1639,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	drm_printf(m, "\tRequests:\n");
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	i915_sched_get_active_request(se);
 	if (rq) {
 		struct intel_timeline *tl = get_timeline(rq);
 
@@ -1671,6 +1673,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	}
 	drm_printf(m, "\tOn hold?: %lu\n", list_count(&se->hold));
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 
 	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
 	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
@@ -1719,66 +1722,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
 	return ktime_add(total, start);
 }
 
-static bool match_ring(struct i915_request *rq)
-{
-	u32 ring = ENGINE_READ(rq->engine, RING_START);
-
-	return ring == i915_ggtt_offset(rq->ring->vma);
-}
-
-struct i915_request *
-intel_engine_find_active_request(struct intel_engine_cs *engine)
-{
-	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *request, *active = NULL;
-
-	/*
-	 * We are called by the error capture, reset and to dump engine
-	 * state at random points in time. In particular, note that neither is
-	 * crucially ordered with an interrupt. After a hang, the GPU is dead
-	 * and we assume that no more writes can happen (we waited long enough
-	 * for all writes that were in transaction to be flushed) - adding an
-	 * extra delay for a recent interrupt is pointless. Hence, we do
-	 * not need an engine->irq_seqno_barrier() before the seqno reads.
-	 * At all other times, we must assume the GPU is still running, but
-	 * we only care about the snapshot of this moment.
-	 */
-	lockdep_assert_held(&se->lock);
-
-	rcu_read_lock();
-	request = execlists_active(&engine->execlists);
-	if (request) {
-		struct intel_timeline *tl = request->context->timeline;
-
-		list_for_each_entry_from_reverse(request, &tl->requests, link) {
-			if (__i915_request_is_complete(request))
-				break;
-
-			active = request;
-		}
-	}
-	rcu_read_unlock();
-	if (active)
-		return active;
-
-	list_for_each_entry(request, &se->requests, sched.link) {
-		if (__i915_request_is_complete(request))
-			continue;
-
-		if (!__i915_request_has_started(request))
-			continue;
-
-		/* More than one preemptible request may match! */
-		if (!match_ring(request))
-			continue;
-
-		active = request;
-		break;
-	}
-
-	return active;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "mock_engine.c"
 #include "selftest_engine.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 02aa3eba4ebb..b2b9e5b889a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2376,7 +2376,7 @@ static void sanitize_hwsp(struct intel_engine_cs *engine)
 
 static void execlists_sanitize(struct intel_engine_cs *engine)
 {
-	GEM_BUG_ON(execlists_active(&engine->execlists));
+	GEM_BUG_ON(*engine->execlists.active);
 
 	/*
 	 * Poison residual state on resume, in case the suspend didn't!
@@ -2752,6 +2752,84 @@ static void execlists_park(struct intel_engine_cs *engine)
 	cancel_timer(&engine->execlists.preempt);
 }
 
+static struct i915_request *execlists_active_request(struct i915_sched *se)
+{
+	struct intel_engine_cs *engine =
+		container_of(se, typeof(*engine), sched);
+	struct i915_request *rq;
+
+	rq = execlists_active(&engine->execlists);
+	if (rq)
+		rq = active_request(rq->context->timeline, rq);
+
+	return rq;
+}
+
+static bool execlists_is_executing(const struct i915_request *rq)
+{
+	struct i915_sched *se = i915_request_get_scheduler(rq);
+	struct intel_engine_execlists *el =
+		&container_of(se, struct intel_engine_cs, sched)->execlists;
+	struct i915_request * const *port, *p;
+	bool inflight = false;
+
+	if (!i915_request_is_ready(rq))
+		return false;
+
+	/*
+	 * Even if we have unwound the request, it may still be on
+	 * the GPU (preempt-to-busy). If that request is inside an
+	 * unpreemptible critical section, it will not be removed. Some
+	 * GPU functions may even be stuck waiting for the paired request
+	 * (__await_execution) to be submitted and cannot be preempted
+	 * until the bond is executing.
+	 *
+	 * As we know that there are always preemption points between
+	 * requests, we know that only the currently executing request
+	 * may be still active even though we have cleared the flag.
+	 * However, we can't rely on our tracking of ELSP[0] to know
+	 * which request is currently active and so maybe stuck, as
+	 * the tracking maybe an event behind. Instead assume that
+	 * if the context is still inflight, then it is still active
+	 * even if the active flag has been cleared.
+	 *
+	 * To further complicate matters, if there a pending promotion, the HW
+	 * may either perform a context switch to the second inflight execlists,
+	 * or it may switch to the pending set of execlists. In the case of the
+	 * latter, it may send the ACK and we process the event copying the
+	 * pending[] over top of inflight[], _overwriting_ our *active. Since
+	 * this implies the HW is arbitrating and not struck in *active, we do
+	 * not worry about complete accuracy, but we do require no read/write
+	 * tearing of the pointer [the read of the pointer must be valid, even
+	 * as the array is being overwritten, for which we require the writes
+	 * to avoid tearing.]
+	 *
+	 * Note that the read of *execlists->active may race with the promotion
+	 * of execlists->pending[] to execlists->inflight[], overwriting
+	 * the value at *execlists->active. This is fine. The promotion implies
+	 * that we received an ACK from the HW, and so the context is not
+	 * stuck -- if we do not see ourselves in *active, the inflight status
+	 * is valid. If instead we see ourselves being copied into *active,
+	 * we are inflight and may signal the callback.
+	 */
+	if (!intel_context_inflight(rq->context))
+		return false;
+
+	rcu_read_lock();
+	for (port = READ_ONCE(el->active);
+	     (p = READ_ONCE(*port)); /* may race with promotion of pending[] */
+	     port++) {
+		if (p->context == rq->context) {
+			inflight = i915_seqno_passed(p->fence.seqno,
+						     rq->fence.seqno);
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return inflight;
+}
+
 static bool can_preempt(struct intel_engine_cs *engine)
 {
 	if (INTEL_GEN(engine->i915) > 8)
@@ -2888,6 +2966,8 @@ static void init_execlists(struct intel_engine_cs *engine)
 	struct intel_uncore *uncore = engine->uncore;
 	u32 base = engine->mmio_base;
 
+	engine->sched.active_request = execlists_active_request;
+	engine->sched.is_executing = execlists_is_executing;
 	tasklet_setup(&engine->sched.tasklet, execlists_submission_tasklet);
 
 	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index cf3bbcbe7520..0c332ee07211 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -324,20 +324,11 @@ static void reset_prepare(struct intel_engine_cs *engine)
 static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 {
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
-	struct i915_request *pos, *rq;
+	struct i915_request *rq;
 	unsigned long flags;
 	u32 head;
 
-	rq = NULL;
 	spin_lock_irqsave(&se->lock, flags);
-	rcu_read_lock();
-	list_for_each_entry(pos, &se->requests, sched.link) {
-		if (!__i915_request_is_complete(pos)) {
-			rq = pos;
-			break;
-		}
-	}
-	rcu_read_unlock();
 
 	/*
 	 * The guilty request will get skipped on a hung engine.
@@ -361,6 +352,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
 	 * subsequent hangs.
 	 */
 
+	rq = i915_sched_get_active_request(se);
 	if (rq) {
 		/*
 		 * Try to restore the logical GPU state to match the
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f8c50195b330..291f5b818925 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1262,15 +1262,11 @@ static bool record_context(struct i915_gem_context_coredump *e,
 	struct i915_gem_context *ctx;
 	bool simulated;
 
-	rcu_read_lock();
-
 	ctx = rcu_dereference(rq->context->gem_context);
 	if (ctx && !kref_get_unless_zero(&ctx->ref))
 		ctx = NULL;
-	if (!ctx) {
-		rcu_read_unlock();
+	if (!ctx)
 		return true;
-	}
 
 	if (I915_SELFTEST_ONLY(!ctx->client)) {
 		strcpy(e->comm, "[kernel]");
@@ -1279,8 +1275,6 @@ static bool record_context(struct i915_gem_context_coredump *e,
 		e->pid = pid_nr(i915_drm_client_pid(ctx->client));
 	}
 
-	rcu_read_unlock();
-
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
@@ -1368,12 +1362,14 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	struct intel_engine_capture_vma *vma = NULL;
 
+	rcu_read_lock();
 	ee->simulated |= record_context(&ee->context, rq);
+	rcu_read_unlock();
 	if (ee->simulated)
 		return NULL;
 
@@ -1436,19 +1432,21 @@ capture_engine(struct intel_engine_cs *engine,
 	struct i915_sched *se = intel_engine_get_scheduler(engine);
 	struct intel_engine_capture_vma *capture = NULL;
 	struct intel_engine_coredump *ee;
-	struct i915_request *rq;
+	const struct i915_request *rq;
 	unsigned long flags;
 
 	ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
 	if (!ee)
 		return NULL;
 
+	rcu_read_lock();
 	spin_lock_irqsave(&se->lock, flags);
-	rq = intel_engine_find_active_request(engine);
+	rq = i915_sched_get_active_request(se);
 	if (rq)
 		capture = intel_engine_coredump_add_request(ee, rq,
 							    ATOMIC_MAYFAIL);
 	spin_unlock_irqrestore(&se->lock, flags);
+	rcu_read_unlock();
 	if (!capture) {
 		kfree(ee);
 		return NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 1764fd254df3..2d8debabfe28 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -235,7 +235,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
 
 struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp);
 
 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
@@ -299,7 +299,7 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
 
 static inline struct intel_engine_capture_vma *
 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
-				  struct i915_request *rq,
+				  const struct i915_request *rq,
 				  gfp_t gfp)
 {
 	return NULL;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c03d3cedf497..792dd0bbea3b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -349,74 +349,6 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
-static struct i915_request * const *
-__engine_active(struct intel_engine_cs *engine)
-{
-	return READ_ONCE(engine->execlists.active);
-}
-
-static bool __request_in_flight(const struct i915_request *signal)
-{
-	struct i915_request * const *port, *rq;
-	bool inflight = false;
-
-	if (!i915_request_is_ready(signal))
-		return false;
-
-	/*
-	 * Even if we have unwound the request, it may still be on
-	 * the GPU (preempt-to-busy). If that request is inside an
-	 * unpreemptible critical section, it will not be removed. Some
-	 * GPU functions may even be stuck waiting for the paired request
-	 * (__await_execution) to be submitted and cannot be preempted
-	 * until the bond is executing.
-	 *
-	 * As we know that there are always preemption points between
-	 * requests, we know that only the currently executing request
-	 * may be still active even though we have cleared the flag.
-	 * However, we can't rely on our tracking of ELSP[0] to know
-	 * which request is currently active and so maybe stuck, as
-	 * the tracking maybe an event behind. Instead assume that
-	 * if the context is still inflight, then it is still active
-	 * even if the active flag has been cleared.
-	 *
-	 * To further complicate matters, if there a pending promotion, the HW
-	 * may either perform a context switch to the second inflight execlists,
-	 * or it may switch to the pending set of execlists. In the case of the
-	 * latter, it may send the ACK and we process the event copying the
-	 * pending[] over top of inflight[], _overwriting_ our *active. Since
-	 * this implies the HW is arbitrating and not struck in *active, we do
-	 * not worry about complete accuracy, but we do require no read/write
-	 * tearing of the pointer [the read of the pointer must be valid, even
-	 * as the array is being overwritten, for which we require the writes
-	 * to avoid tearing.]
-	 *
-	 * Note that the read of *execlists->active may race with the promotion
-	 * of execlists->pending[] to execlists->inflight[], overwritting
-	 * the value at *execlists->active. This is fine. The promotion implies
-	 * that we received an ACK from the HW, and so the context is not
-	 * stuck -- if we do not see ourselves in *active, the inflight status
-	 * is valid. If instead we see ourselves being copied into *active,
-	 * we are inflight and may signal the callback.
-	 */
-	if (!intel_context_inflight(signal->context))
-		return false;
-
-	rcu_read_lock();
-	for (port = __engine_active(signal->engine);
-	     (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
-	     port++) {
-		if (rq->context == signal->context) {
-			inflight = i915_seqno_passed(rq->fence.seqno,
-						     signal->fence.seqno);
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	return inflight;
-}
-
 static int
 __await_execution(struct i915_request *rq,
 		  struct i915_request *signal,
@@ -460,8 +392,7 @@ __await_execution(struct i915_request *rq,
 	 * the completed/retired request.
 	 */
 	if (llist_add(&cb->work.node.llist, &signal->execute_cb)) {
-		if (i915_request_is_active(signal) ||
-		    __request_in_flight(signal))
+		if (i915_request_is_executing(signal))
 			__notify_execute_cb_imm(signal);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index c41582b96b46..10561f78dd3c 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -629,4 +629,26 @@ static inline bool i915_request_use_scheduler(const struct i915_request *rq)
 	return intel_engine_has_scheduler(rq->engine);
 }
 
+static inline bool i915_request_is_executing(const struct i915_request *rq)
+{
+	struct i915_sched *se;
+
+	/* Is the request presently on the HW execution queue? */
+	if (i915_request_is_active(rq))
+		return true;
+
+	se = i915_request_get_scheduler(rq);
+	if (!se->is_executing)
+		return false;
+
+	/*
+	 * However, if it is not presently on the HW execution queue, it
+	 * may have been recently removed from the queue, but is in fact
+	 * still executing until the HW has completed a preemption. We
+	 * need to double check with the backend for it to query the HW
+	 * to see if the request is still executing.
+	 */
+	return se->is_executing(rq);
+}
+
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index e8db7e614ff5..8a50e8fcf95a 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -112,6 +112,52 @@ static void init_ipi(struct i915_sched_ipi *ipi)
 	ipi->list = NULL;
 }
 
+static bool match_ring(struct i915_request *rq)
+{
+	const struct intel_engine_cs *engine = rq->engine;
+	const struct intel_ring *ring = rq->ring;
+
+	return ENGINE_READ(engine, RING_START) == i915_ggtt_offset(ring->vma);
+}
+
+static struct i915_request *
+i915_sched_default_active_request(struct i915_sched *se)
+{
+	struct i915_request *request, *active = NULL;
+
+	/*
+	 * We are called by the error capture, reset and to dump engine
+	 * state at random points in time. In particular, note that neither is
+	 * crucially ordered with an interrupt. After a hang, the GPU is dead
+	 * and we assume that no more writes can happen (we waited long enough
+	 * for all writes that were in transaction to be flushed) - adding an
+	 * extra delay for a recent interrupt is pointless. Hence, we do
+	 * not need an engine->irq_seqno_barrier() before the seqno reads.
+	 * At all other times, we must assume the GPU is still running, but
+	 * we only care about the snapshot of this moment.
+	 */
+	lockdep_assert_held(&se->lock);
+
+	rcu_read_lock();
+	list_for_each_entry(request, &se->requests, sched.link) {
+		if (__i915_request_is_complete(request))
+			continue;
+
+		if (!__i915_request_has_started(request))
+			continue;
+
+		/* More than one preemptible request may match! */
+		if (!match_ring(request))
+			continue;
+
+		active = request;
+		break;
+	}
+	rcu_read_unlock();
+
+	return active;
+}
+
 void i915_sched_init(struct i915_sched *se,
 		     struct device *dev,
 		     const char *name,
@@ -134,6 +180,7 @@ void i915_sched_init(struct i915_sched *se,
 	init_ipi(&se->ipi);
 
 	se->submit_request = i915_request_enqueue;
+	se->active_request = i915_sched_default_active_request;
 }
 
 void i915_sched_park(struct i915_sched *se)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 1803fc37bada..d6a7f15b953f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -138,6 +138,14 @@ static inline void i915_sched_flush(struct i915_sched *se)
 	__i915_sched_flush(se, true);
 }
 
+/* Find the currently executing request on the backend */
+static inline struct i915_request *
+i915_sched_get_active_request(struct i915_sched *se)
+{
+	lockdep_assert_held(&se->lock);
+	return se->active_request(se);
+}
+
 void i915_request_show_with_schedule(struct drm_printer *m,
 				     const struct i915_request *rq,
 				     const char *prefix,
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 2d746af501d6..b433a3a861b9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -37,6 +37,10 @@ struct i915_sched {
 	 */
 	void (*submit_request)(struct i915_request *rq);
 
+	struct i915_request *(*active_request)(struct i915_sched *se);
+
+	bool (*is_executing)(const struct i915_request *rq);
+
 	struct list_head requests; /* active request, on HW */
 	struct list_head hold; /* ready requests, but on hold */
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-02-07 17:11 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-07 15:03 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
2021-02-07 15:03 ` [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler Chris Wilson
2021-02-07 15:03 ` [Intel-gfx] [CI 3/4] drm/i915: Show execlists queues when dumping state Chris Wilson
2021-02-07 15:03 ` [Intel-gfx] [CI 4/4] drm/i915: Wrap i915_request_use_semaphores() Chris Wilson
2021-02-07 15:16 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/4] drm/i915: Move submit_request to i915_sched_engine Patchwork
2021-02-07 15:45 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-02-07 17:11 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2021-02-07 12:00 [Intel-gfx] [CI 1/4] " Chris Wilson
2021-02-07 12:00 ` [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler Chris Wilson
2021-02-07  2:31 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
2021-02-07  2:31 ` [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler Chris Wilson
2021-02-06 22:31 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
2021-02-06 22:31 ` [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler Chris Wilson
2021-02-06 16:32 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
2021-02-06 16:32 ` [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler Chris Wilson
2021-02-06 14:24 [Intel-gfx] [CI 1/4] drm/i915: Move submit_request to i915_sched_engine Chris Wilson
2021-02-06 14:24 ` [Intel-gfx] [CI 2/4] drm/i915: Move finding the current active request to the scheduler Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.