[PATCH 01/14] drm/i915/hangcheck: Track context changes

* [PATCH 01/14] drm/i915/hangcheck: Track context changes
@ 2019-05-01 11:45 Chris Wilson
  2019-05-01 11:45 ` [PATCH 02/14] drm/i915: Include fence signaled bit in print_request() Chris Wilson
                   ` (23 more replies)
  0 siblings, 24 replies; 52+ messages in thread
From: Chris Wilson @ 2019-05-01 11:45 UTC (permalink / raw)
  To: intel-gfx

Given sufficient preemption, we may see a busy system that doesn't
advance seqno while performing work across multiple contexts, and given
sufficient pathology not even notice a change in ACTHD. What does change
between the preempting contexts is their RING, so take note of that and
treat a change in the ring address as being an indication of forward
progress.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  1 +
 drivers/gpu/drm/i915/gt/intel_hangcheck.c    | 12 +++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 9d64e33f8427..c0ab11b12e14 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -53,6 +53,7 @@ struct intel_instdone {
 
 struct intel_engine_hangcheck {
 	u64 acthd;
+	u32 last_ring;
 	u32 last_seqno;
 	u32 next_seqno;
 	unsigned long action_timestamp;
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
index e5eaa06fe74d..721ab74a382f 100644
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
@@ -27,6 +27,7 @@
 
 struct hangcheck {
 	u64 acthd;
+	u32 ring;
 	u32 seqno;
 	enum intel_engine_hangcheck_action action;
 	unsigned long action_timestamp;
@@ -134,6 +135,7 @@ static void hangcheck_load_sample(struct intel_engine_cs *engine,
 {
 	hc->acthd = intel_engine_get_active_head(engine);
 	hc->seqno = intel_engine_get_hangcheck_seqno(engine);
+	hc->ring = ENGINE_READ(engine, RING_START);
 }
 
 static void hangcheck_store_sample(struct intel_engine_cs *engine,
@@ -141,18 +143,22 @@ static void hangcheck_store_sample(struct intel_engine_cs *engine,
 {
 	engine->hangcheck.acthd = hc->acthd;
 	engine->hangcheck.last_seqno = hc->seqno;
+	engine->hangcheck.last_ring = hc->ring;
 }
 
 static enum intel_engine_hangcheck_action
 hangcheck_get_action(struct intel_engine_cs *engine,
 		     const struct hangcheck *hc)
 {
-	if (engine->hangcheck.last_seqno != hc->seqno)
-		return ENGINE_ACTIVE_SEQNO;
-
 	if (intel_engine_is_idle(engine))
 		return ENGINE_IDLE;
 
+	if (engine->hangcheck.last_ring != hc->ring)
+		return ENGINE_ACTIVE_SEQNO;
+
+	if (engine->hangcheck.last_seqno != hc->seqno)
+		return ENGINE_ACTIVE_SEQNO;
+
 	return engine_stuck(engine, hc->acthd);
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 52+ messages in thread