All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: intel-gfx@lists.freedesktop.org
Cc: miku@iki.fi
Subject: [PATCH 3/4] drm/i915: Use hangcheck score to find guilty context
Date: Wed, 29 Jan 2014 17:05:38 +0200	[thread overview]
Message-ID: <1391007939-5741-4-git-send-email-mika.kuoppala@intel.com> (raw)
In-Reply-To: <1391007939-5741-1-git-send-email-mika.kuoppala@intel.com>

With full ppgtt using acthd is not enough to find guilty
batch buffer. We get multiple false positives as acthd is
per vm.

Instead of scanning which vm was running on a ring,
to find corressponding context, use a different, simpler,
strategy of finding batches that caused gpu hang:

If hangcheck has declared ring to be hung, find first non complete
request on that ring and claim it was guilty.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73652
Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         |   42 +++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_irq.c         |    3 +--
 drivers/gpu/drm/i915/intel_ringbuffer.h |    2 ++
 3 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8f9ac0a..a46a1a7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2331,21 +2331,19 @@ static bool i915_context_is_banned(const struct i915_hw_context *ctx)
 
 static void i915_set_reset_status(struct intel_ring_buffer *ring,
 				  struct drm_i915_gem_request *request,
-				  u32 acthd)
+				  const bool guilty)
 {
-	bool inside, guilty;
+	const u32 acthd = intel_ring_get_active_head(ring);
+	bool inside;
 	unsigned long offset = 0;
 	struct i915_hw_context *ctx = request->ctx;
 	struct i915_ctx_hang_stats *hs;
 
-	/* Innocent until proven guilty */
-	guilty = false;
-
 	if (request->batch_obj)
 		offset = i915_gem_obj_offset(request->batch_obj,
 					     request_to_vm(request));
 
-	if (ring->hangcheck.action != HANGCHECK_WAIT &&
+	if (guilty &&
 	    i915_request_guilty(request, acthd, &inside)) {
 		DRM_DEBUG("%s hung %s bo (0x%lx ctx %d) at 0x%x\n",
 			  ring->name,
@@ -2353,8 +2351,6 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
 			  offset,
 			  ctx ? ctx->id : 0,
 			  acthd);
-
-		guilty = true;
 	}
 
 	if (WARN_ON(!ctx))
@@ -2382,19 +2378,39 @@ static void i915_gem_free_request(struct drm_i915_gem_request *request)
 	kfree(request);
 }
 
-static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
-				       struct intel_ring_buffer *ring)
+static struct drm_i915_gem_request *
+i915_gem_find_first_non_complete(struct intel_ring_buffer *ring)
 {
-	u32 completed_seqno = ring->get_seqno(ring, false);
-	u32 acthd = intel_ring_get_active_head(ring);
 	struct drm_i915_gem_request *request;
+	const u32 completed_seqno = ring->get_seqno(ring, false);
 
 	list_for_each_entry(request, &ring->request_list, list) {
 		if (i915_seqno_passed(completed_seqno, request->seqno))
 			continue;
 
-		i915_set_reset_status(ring, request, acthd);
+		return request;
 	}
+
+	return NULL;
+}
+
+static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
+				       struct intel_ring_buffer *ring)
+{
+	struct drm_i915_gem_request *request;
+	bool ring_hung;
+
+	request = i915_gem_find_first_non_complete(ring);
+
+	if (request == NULL)
+		return;
+
+	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
+
+	i915_set_reset_status(ring, request, ring_hung);
+
+	list_for_each_entry_continue(request, &ring->request_list, list)
+		i915_set_reset_status(ring, request, false);
 }
 
 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 72ade87..e4eb1988 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2493,7 +2493,6 @@ static void i915_hangcheck_elapsed(unsigned long data)
 #define BUSY 1
 #define KICK 5
 #define HUNG 20
-#define FIRE 30
 
 	if (!i915.enable_hangcheck)
 		return;
@@ -2577,7 +2576,7 @@ static void i915_hangcheck_elapsed(unsigned long data)
 	}
 
 	for_each_ring(ring, dev_priv, i) {
-		if (ring->hangcheck.score > FIRE) {
+		if (ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) {
 			DRM_INFO("%s on %s\n",
 				 stuck[i] ? "stuck" : "no progress",
 				 ring->name);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 71a73f4..38c757e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -41,6 +41,8 @@ enum intel_ring_hangcheck_action {
 	HANGCHECK_HUNG,
 };
 
+#define HANGCHECK_SCORE_RING_HUNG 31
+
 struct intel_ring_hangcheck {
 	bool deadlock;
 	u32 seqno;
-- 
1.7.9.5

  parent reply	other threads:[~2014-01-29 15:05 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-29 15:05 [PATCH 0/4] context reset stats fixes for ppgtt Mika Kuoppala
2014-01-29 15:05 ` [PATCH 1/4] drm/i915: Use i915_hw_context to set reset stats Mika Kuoppala
2014-01-29 20:38   ` Ben Widawsky
2014-01-30 14:01   ` [PATCH v2 " Mika Kuoppala
2014-01-30 15:26     ` Daniel Vetter
2014-01-29 15:05 ` [PATCH 2/4] drm/i915: Tune down debug output when context is banned Mika Kuoppala
2014-01-29 20:47   ` Ben Widawsky
2014-01-29 20:54     ` Daniel Vetter
2014-01-30 14:05   ` [PATCH v3 " Mika Kuoppala
2014-01-29 15:05 ` Mika Kuoppala [this message]
2014-01-29 15:05 ` [PATCH 4/4] drm/i915: Get rid of acthd based guilty batch search Mika Kuoppala
2014-01-29 21:44   ` Ben Widawsky
2014-01-30 12:59     ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1391007939-5741-4-git-send-email-mika.kuoppala@intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=miku@iki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.