All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: mika.kuoppala@intel.com
Subject: [PATCH 6/7] drm/i915: Serialise resets with wedging
Date: Wed,  6 Feb 2019 12:04:31 +0000	[thread overview]
Message-ID: <20190206120432.9452-6-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20190206120432.9452-1-chris@chris-wilson.co.uk>

Prevent concurrent set-wedge with ongoing resets (and vice versa) by
taking the same wedge_mutex around both operations.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_reset.c | 68 ++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 1d3bec0ff990..b629f25a81f0 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -793,17 +793,14 @@ static void nop_submit_request(struct i915_request *request)
 	intel_engine_queue_breadcrumbs(engine);
 }
 
-void i915_gem_set_wedged(struct drm_i915_private *i915)
+static void __i915_gem_set_wedged(struct drm_i915_private *i915)
 {
 	struct i915_gpu_error *error = &i915->gpu_error;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	mutex_lock(&error->wedge_mutex);
-	if (test_bit(I915_WEDGED, &error->flags)) {
-		mutex_unlock(&error->wedge_mutex);
+	if (test_bit(I915_WEDGED, &error->flags))
 		return;
-	}
 
 	if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
 		struct drm_printer p = drm_debug_printer(__func__);
@@ -852,12 +849,18 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 	set_bit(I915_WEDGED, &error->flags);
 
 	GEM_TRACE("end\n");
-	mutex_unlock(&error->wedge_mutex);
+}
 
-	wake_up_all(&error->reset_queue);
+void i915_gem_set_wedged(struct drm_i915_private *i915)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+
+	mutex_lock(&error->wedge_mutex);
+	__i915_gem_set_wedged(i915);
+	mutex_unlock(&error->wedge_mutex);
 }
 
-bool i915_gem_unset_wedged(struct drm_i915_private *i915)
+static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
 	struct i915_gpu_error *error = &i915->gpu_error;
 	struct i915_timeline *tl;
@@ -868,8 +871,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
 		return false;
 
-	mutex_lock(&error->wedge_mutex);
-
 	GEM_TRACE("start\n");
 
 	/*
@@ -920,11 +921,21 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
 	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
 
-	mutex_unlock(&i915->gpu_error.wedge_mutex);
-
 	return true;
 }
 
+bool i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	bool result;
+
+	mutex_lock(&error->wedge_mutex);
+	result = __i915_gem_unset_wedged(i915);
+	mutex_unlock(&error->wedge_mutex);
+
+	return result;
+}
+
 static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
 {
 	int err, i;
@@ -974,7 +985,7 @@ void i915_reset(struct drm_i915_private *i915,
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
-	if (!i915_gem_unset_wedged(i915))
+	if (!__i915_gem_unset_wedged(i915))
 		return;
 
 	if (reason)
@@ -1036,7 +1047,7 @@ void i915_reset(struct drm_i915_private *i915,
 	 */
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 error:
-	i915_gem_set_wedged(i915);
+	__i915_gem_set_wedged(i915);
 	goto finish;
 }
 
@@ -1128,7 +1139,9 @@ static void i915_reset_device(struct drm_i915_private *i915,
 	i915_wedge_on_timeout(&w, i915, 5 * HZ) {
 		intel_prepare_reset(i915);
 
+		mutex_lock(&error->wedge_mutex);
 		i915_reset(i915, engine_mask, reason);
+		mutex_unlock(&error->wedge_mutex);
 
 		intel_finish_reset(i915);
 	}
@@ -1196,6 +1209,7 @@ void i915_handle_error(struct drm_i915_private *i915,
 		       unsigned long flags,
 		       const char *fmt, ...)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct intel_engine_cs *engine;
 	intel_wakeref_t wakeref;
 	unsigned int tmp;
@@ -1232,20 +1246,19 @@ void i915_handle_error(struct drm_i915_private *i915,
 	 * Try engine reset when available. We fall back to full reset if
 	 * single reset fails.
 	 */
-	if (intel_has_reset_engine(i915) &&
-	    !i915_terminally_wedged(&i915->gpu_error)) {
+	if (intel_has_reset_engine(i915) && !i915_terminally_wedged(error)) {
 		for_each_engine_masked(engine, i915, engine_mask, tmp) {
 			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
 			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
-					     &i915->gpu_error.flags))
+					     &error->flags))
 				continue;
 
 			if (i915_reset_engine(engine, msg) == 0)
 				engine_mask &= ~intel_engine_flag(engine);
 
 			clear_bit(I915_RESET_ENGINE + engine->id,
-				  &i915->gpu_error.flags);
-			wake_up_bit(&i915->gpu_error.flags,
+				  &error->flags);
+			wake_up_bit(&error->flags,
 				    I915_RESET_ENGINE + engine->id);
 		}
 	}
@@ -1254,10 +1267,9 @@ void i915_handle_error(struct drm_i915_private *i915,
 		goto out;
 
 	/* Full reset needs the mutex, stop any other user trying to do so. */
-	if (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) {
-		wait_event(i915->gpu_error.reset_queue,
-			   !test_bit(I915_RESET_BACKOFF,
-				     &i915->gpu_error.flags));
+	if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) {
+		wait_event(error->reset_queue,
+			   !test_bit(I915_RESET_BACKOFF, &error->flags));
 		goto out; /* piggy-back on the other reset */
 	}
 
@@ -1267,8 +1279,8 @@ void i915_handle_error(struct drm_i915_private *i915,
 	/* Prevent any other reset-engine attempt. */
 	for_each_engine(engine, i915, tmp) {
 		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
-					&i915->gpu_error.flags))
-			wait_on_bit(&i915->gpu_error.flags,
+					&error->flags))
+			wait_on_bit(&error->flags,
 				    I915_RESET_ENGINE + engine->id,
 				    TASK_UNINTERRUPTIBLE);
 	}
@@ -1277,11 +1289,11 @@ void i915_handle_error(struct drm_i915_private *i915,
 
 	for_each_engine(engine, i915, tmp) {
 		clear_bit(I915_RESET_ENGINE + engine->id,
-			  &i915->gpu_error.flags);
+			  &error->flags);
 	}
 
-	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
-	wake_up_all(&i915->gpu_error.reset_queue);
+	clear_bit(I915_RESET_BACKOFF, &error->flags);
+	wake_up_all(&error->reset_queue);
 
 out:
 	intel_runtime_pm_put(i915, wakeref);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-02-06 12:06 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-06 12:04 [PATCH 1/7] drm/i915: Hack and slash, throttle execbuffer hogs Chris Wilson
2019-02-06 12:04 ` [PATCH 2/7] drm/i915: Revoke mmaps and prevent access to fence registers across reset Chris Wilson
2019-02-06 12:04 ` [PATCH 3/7] drm/i915: Force the GPU reset upon wedging Chris Wilson
2019-02-06 12:04 ` [PATCH 4/7] drm/i915: Uninterruptibly drain the timelines on unwedging Chris Wilson
2019-02-06 12:04 ` [PATCH 5/7] drm/i915: Wait for old resets before applying debugfs/i915_wedged Chris Wilson
2019-02-06 12:04 ` Chris Wilson [this message]
2019-02-06 12:04 ` [PATCH 7/7] drm/i915: Don't claim an unstarted request was guilty Chris Wilson
2019-02-06 12:45 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/7] drm/i915: Hack and slash, throttle execbuffer hogs Patchwork
2019-02-06 13:28 ` ✓ Fi.CI.BAT: success " Patchwork
2019-02-06 15:02 ` ✓ Fi.CI.IGT: " Patchwork
2019-02-06 15:08   ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190206120432.9452-6-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=mika.kuoppala@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.