All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Subject: [PATCH 4/6] drm/i915: Harden detection of missed interrupts
Date: Fri, 15 Jan 2016 14:35:43 +0000	[thread overview]
Message-ID: <1452868545-19586-5-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <1452868545-19586-1-git-send-email-chris@chris-wilson.co.uk>

Only declare a missed interrupt if we find that the GPU is idle with
waiters and a hangcheck interval has passed in which no new user
interrupts have been raised.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 11 +++++++----
 drivers/gpu/drm/i915/i915_irq.c         |  7 ++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b421b53ca128..966fc022418c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -730,10 +730,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 static void i915_ring_seqno_info(struct seq_file *m,
 				 struct intel_engine_cs *ring)
 {
-	if (ring->get_seqno) {
-		seq_printf(m, "Current sequence (%s): %x\n",
-			   ring->name, ring->get_seqno(ring));
-	}
+	seq_printf(m, "Current sequence (%s): %x\n",
+		   ring->name, ring->get_seqno(ring));
+	seq_printf(m, "Current user interrupts (%s): %x\n",
+		   ring->name, READ_ONCE(ring->user_interrupts));
 }
 
 static int i915_gem_seqno_info(struct seq_file *m, void *data)
@@ -1361,6 +1361,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		seq_printf(m, "%s:\n", ring->name);
 		seq_printf(m, "\tseqno = %x [current %x]\n",
 			   ring->hangcheck.seqno, seqno[i]);
+		seq_printf(m, "\tuser interrupts = %x [current %x]\n",
+			   ring->hangcheck.user_interrupts,
+			   ring->user_interrupts);
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)ring->hangcheck.acthd,
 			   (long long)acthd[i]);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 07bc2cdd6252..978eebcf4594 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1000,6 +1000,7 @@ static void notify_ring(struct intel_engine_cs *ring)
 		return;
 
 	trace_i915_gem_request_notify(ring);
+	ring->user_interrupts++;
 
 	wake_up_all(&ring->irq_queue);
 }
@@ -3097,6 +3098,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	for_each_ring(ring, dev_priv, i) {
 		u64 acthd;
 		u32 seqno;
+		unsigned user_interrupts;
 		bool busy = true;
 
 		semaphore_clear_deadlocks(dev_priv);
@@ -3113,6 +3115,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 		acthd = intel_ring_get_active_head(ring);
 		seqno = ring->get_seqno(ring);
+		user_interrupts = READ_ONCE(ring->user_interrupts);
 
 		if (ring->hangcheck.seqno == seqno) {
 			if (ring_idle(ring, seqno)) {
@@ -3120,7 +3123,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 				if (waitqueue_active(&ring->irq_queue)) {
 					/* Issue a wake-up to catch stuck h/w. */
-					if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
+					if (ring->hangcheck.user_interrupts == user_interrupts &&
+					    !test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
 						if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)))
 							DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
 								  ring->name);
@@ -3187,6 +3191,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 		ring->hangcheck.seqno = seqno;
 		ring->hangcheck.acthd = acthd;
+		ring->hangcheck.user_interrupts = user_interrupts;
 		busy_count += busy;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 8fb02b21e75d..b22573561669 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -90,6 +90,7 @@ struct intel_ring_hangcheck {
 	u64 acthd;
 	u64 max_acthd;
 	u32 seqno;
+	unsigned user_interrupts;
 	int score;
 	enum intel_ring_hangcheck_action action;
 	int deadlock;
@@ -301,6 +302,7 @@ struct  intel_engine_cs {
 	 * inspecting request list.
 	 */
 	u32 last_submitted_seqno;
+	unsigned user_interrupts;
 
 	bool gpu_caches_dirty;
 
-- 
2.7.0.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-01-15 14:36 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-15 14:35 "missed-interrupt" syndrome on Broadwell+ Chris Wilson
2016-01-15 14:35 ` [PATCH 1/6] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ Chris Wilson
2016-01-15 17:55   ` Mika Kuoppala
2016-01-15 14:35 ` [PATCH 2/6] drm/i915: Separate out the seqno-barrier from engine->get_seqno Chris Wilson
2016-01-15 14:35 ` [PATCH 3/6] drm/i915: Broadwell execlists needs exactly the same seqno w/a as legacy Chris Wilson
2016-01-15 14:35 ` Chris Wilson [this message]
2016-01-15 14:35 ` [PATCH 5/6] drm/i915: Use simplest form for flushing the single cacheline in the HWS Chris Wilson
2016-01-15 14:35 ` [PATCH 6/6] drm/i915: Replace manual barrier() with READ_ONCE() in HWS accessor Chris Wilson
2016-01-15 15:20 ` ✗ Fi.CI.BAT: warning for series starting with [1/6] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ Patchwork
2016-01-16  9:46 ` [PATCH v2 1/6] " Chris Wilson
2016-01-16  9:46   ` [PATCH v2 2/6] drm/i915: Separate out the seqno-barrier from engine->get_seqno Chris Wilson
2016-01-16  9:46   ` [PATCH v2 3/6] drm/i915: Use ordered seqno write interrupt generation on gen8+ execlists Chris Wilson
2016-01-18  8:58     ` [PATCH] magic-clflush-fix Chris Wilson
2016-01-18  9:02       ` Chris Wilson
2016-01-18  9:02     ` [PATCH] drm/i915: Use ordered seqno write interrupt generation on gen8+ execlists Chris Wilson
2016-01-16  9:46   ` [PATCH v2 4/6] drm/i915: Harden detection of missed interrupts Chris Wilson
2016-01-18 13:07     ` Mika Kuoppala
2016-01-18 15:35       ` Chris Wilson
2016-01-16  9:46   ` [PATCH v2 5/6] drm/i915: Use simplest form for flushing the single cacheline in the HWS Chris Wilson
2016-01-16  9:46   ` [PATCH v2 6/6] drm/i915: Replace manual barrier() with READ_ONCE() in HWS accessor Chris Wilson
2016-01-16 10:01 ` ✗ Fi.CI.BAT: failure for series starting with [1/6] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ (rev6) Patchwork
2016-01-18  9:30 ` ✗ Fi.CI.BAT: failure for series starting with [1/6] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ (rev8) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1452868545-19586-5-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=mika.kuoppala@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.