All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH v3 14/16] drm/i915: refuse to submit more batchbuffers from guilty context
Date: Thu,  4 Apr 2013 18:32:46 +0300	[thread overview]
Message-ID: <1365089568-20457-15-git-send-email-mika.kuoppala@intel.com> (raw)
In-Reply-To: <1365089568-20457-1-git-send-email-mika.kuoppala@intel.com>

If context has recently submitted a faulty batchbuffers guilty of
gpu hang and decides to keep submitting more crap, ban it permanently.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |   23 ++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_drv.h            |    7 +++++++
 drivers/gpu/drm/i915/i915_gem.c            |    7 +++++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   13 +++++++++++++
 4 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a5b8aa9..0928f11 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -852,6 +852,8 @@ int intel_gpu_reset(struct drm_device *dev)
 int i915_reset(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_ctx_hang_stats *hs;
+	bool do_wedge = true;
 	int ret;
 
 	if (!i915_try_reset)
@@ -859,10 +861,29 @@ int i915_reset(struct drm_device *dev)
 
 	mutex_lock(&dev->struct_mutex);
 
+	/* i915_gem_reset will set this if it finds guilty context */
+	dev_priv->gpu_error.hang_stats = NULL;
+
 	i915_gem_reset(dev);
 
+	hs = dev_priv->gpu_error.hang_stats;
+
+	if (hs) {
+		if (hs->batch_active == 1) {
+			do_wedge = false;
+		} else if (!hs->banned &&
+			   get_seconds() - hs->batch_active_reset_ts < 5) {
+			hs->banned = true;
+			do_wedge = false;
+		}
+
+		hs->batch_active_reset_ts = get_seconds();
+	}
+
+	dev_priv->gpu_error.hang_stats = NULL;
+
 	ret = -ENODEV;
-	if (get_seconds() - dev_priv->gpu_error.last_reset < 5)
+	if (do_wedge && get_seconds() - dev_priv->gpu_error.last_reset < 5)
 		DRM_ERROR("GPU hanging too fast, declaring wedged!\n");
 	else
 		ret = intel_gpu_reset(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8223908..30ba79c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -458,6 +458,12 @@ struct i915_ctx_hang_stats {
 
 	/* This context had batch active when hang was declared */
 	unsigned batch_active;
+
+	/* Time when this context was last blamed for a GPU reset */
+	unsigned long batch_active_reset_ts;
+
+	/* This context is banned to submit more work */
+	bool banned;
 };
 
 /* This must match up with the value previously used for execbuf2.rsvd1. */
@@ -831,6 +837,7 @@ struct i915_gpu_error {
 	struct work_struct work;
 
 	unsigned long last_reset;
+	struct i915_ctx_hang_stats *hang_stats;
 
 	/**
 	 * State variable and reset counter controlling the reset flow
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 475b6ad..ca5c9c3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2147,6 +2147,7 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
 				  struct drm_i915_gem_request *request,
 				  u32 acthd)
 {
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct i915_ctx_hang_stats *hs = NULL;
 	bool inside, guilty;
 
@@ -2175,10 +2176,12 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
 		hs = &request->file_priv->hang_stats;
 
 	if (hs) {
-		if (guilty)
+		if (guilty) {
 			hs->batch_active++;
-		else
+			dev_priv->gpu_error.hang_stats = hs;
+		} else {
 			hs->batch_pending++;
+		}
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bd1750a..f1b1ea9 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -844,6 +844,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct drm_clip_rect *cliprects = NULL;
 	struct intel_ring_buffer *ring;
 	struct i915_hw_context *ctx;
+	struct i915_ctx_hang_stats *hs;
 	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
 	u32 exec_start, exec_len;
 	u32 mask, flags;
@@ -1026,6 +1027,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (ret)
 		goto err;
 
+	hs = i915_gem_context_get_hang_stats(&dev_priv->ring[RCS],
+					     file, ctx_id);
+	if (IS_ERR(hs)) {
+		ret = PTR_ERR(hs);
+		goto err;
+	}
+
+	if (hs->banned) {
+		ret = -EIO;
+		goto err;
+	}
+
 	ctx = i915_switch_context(ring, file, ctx_id);
 	if (IS_ERR(ctx)) {
 		ret = PTR_ERR(ctx);
-- 
1.7.9.5

  parent reply	other threads:[~2013-04-04 15:28 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-04 15:32 [PATCH v3 00/16] arb robustness enablers v3 Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 01/16] drm/i915: return context from i915_switch_context() Mika Kuoppala
2013-04-20 18:10   ` Ben Widawsky
2013-04-04 15:32 ` [PATCH v3 02/16] drm/i915: cleanup i915_add_request Mika Kuoppala
2013-04-20 18:36   ` Ben Widawsky
2013-04-04 15:32 ` [PATCH v3 03/16] drm/i915: reference count for i915_hw_contexts Mika Kuoppala
2013-04-09 22:18   ` Chris Wilson
2013-04-10  0:12     ` [PATCH] " Ben Widawsky
2013-04-20 18:11       ` Ben Widawsky
2013-04-04 15:32 ` [PATCH v3 04/16] drm/i915: pass seqno to i915_hangcheck_ring_idle Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 05/16] drm/i915: track ring progression using seqnos Mika Kuoppala
2013-04-20 18:43   ` Ben Widawsky
2013-04-21 21:07     ` Ben Widawsky
2013-04-22 13:36       ` Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 06/16] drm/i915: introduce i915_hangcheck_ring_hung Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 07/16] drm/i915: detect hang using per ring hangcheck_score Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 08/16] drm/i915: remove i915_hangcheck_hung Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 09/16] drm/i915: add struct i915_ctx_hang_stats Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 10/16] drm/i915: add i915_gem_context_get_hang_stats() Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 11/16] drm/i915: add batch object and context to i915_add_request() Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 12/16] drm/i915: mark rings which were waiting when hang happened Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 13/16] drm/i915: find guilty batch buffer on ring resets Mika Kuoppala
2013-04-04 15:32 ` Mika Kuoppala [this message]
2013-04-11 15:13   ` [PATCH v3 14/16] drm/i915: refuse to submit more batchbuffers from guilty context Mika Kuoppala
2013-04-16 11:32   ` [PATCH " Mika Kuoppala
2013-04-16 13:59     ` Chris Wilson
2013-04-17 10:11       ` [PATCH v3 " Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 15/16] drm/i915: add i915_reset_count Mika Kuoppala
2013-04-04 15:32 ` [PATCH v3 16/16] drm/i915: add i915_get_reset_stats_ioctl Mika Kuoppala
2013-04-24 23:27 ` [PATCH v3 00/16] arb robustness enablers v3 Ben Widawsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1365089568-20457-15-git-send-email-mika.kuoppala@intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.