All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 28/40] drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
Date: Wed, 19 Sep 2018 20:55:32 +0100	[thread overview]
Message-ID: <20180919195544.1511-28-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20180919195544.1511-1-chris@chris-wilson.co.uk>

Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more
consistently if called concurrently.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c               | 32 ++++++++++++++-----
 drivers/gpu/drm/i915/i915_gpu_error.h         |  4 ++-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a8cdaeaea7a1..a2e7f7487588 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3324,10 +3324,15 @@ static void nop_complete_submit_request(struct i915_request *request)
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	GEM_TRACE("start\n");
+	mutex_lock(&error->wedge_mutex);
+	if (test_bit(I915_WEDGED, &error->flags)) {
+		mutex_unlock(&error->wedge_mutex);
+		return;
+	}
 
 	if (GEM_SHOW_DEBUG()) {
 		struct drm_printer p = drm_debug_printer(__func__);
@@ -3336,8 +3341,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 	}
 
-	if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
-		goto out;
+	GEM_TRACE("start\n");
 
 	/*
 	 * First, stop submission to hw, but do not yet complete requests by
@@ -3397,20 +3401,28 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		i915_gem_reset_finish_engine(engine);
 	}
 
-out:
+	smp_mb__before_atomic();
+	set_bit(I915_WEDGED, &error->flags);
+
 	GEM_TRACE("end\n");
+	mutex_unlock(&error->wedge_mutex);
 
-	wake_up_all(&i915->gpu_error.reset_queue);
+	wake_up_all(&error->reset_queue);
 }
 
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
+	struct i915_gpu_error *error = &i915->gpu_error;
 	struct i915_timeline *tl;
+	bool ret = false;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
-	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+
+	if (!test_bit(I915_WEDGED, &error->flags))
 		return true;
 
+	mutex_lock(&error->wedge_mutex);
+
 	GEM_TRACE("start\n");
 
 	/*
@@ -3444,7 +3456,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		 */
 		if (dma_fence_default_wait(&rq->fence, true,
 					   MAX_SCHEDULE_TIMEOUT) < 0)
-			return false;
+			goto unlock;
 	}
 	i915_retire_requests(i915);
 	GEM_BUG_ON(i915->gt.active_requests);
@@ -3468,8 +3480,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 
 	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
 	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+	ret = true;
+unlock:
+	mutex_unlock(&i915->gpu_error.wedge_mutex);
 
-	return true;
+	return ret;
 }
 
 static void
@@ -5817,6 +5832,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 			  i915_gem_idle_work_handler);
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
+	mutex_init(&dev_priv->gpu_error.wedge_mutex);
 
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 1c1bc0c23468..4f5c5be56002 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -269,8 +269,8 @@ struct i915_gpu_error {
 #define I915_RESET_BACKOFF	0
 #define I915_RESET_HANDOFF	1
 #define I915_RESET_MODESET	2
+#define I915_RESET_ENGINE	3
 #define I915_WEDGED		(BITS_PER_LONG - 1)
-#define I915_RESET_ENGINE	(I915_WEDGED - I915_NUM_ENGINES)
 
 	/** Number of times an engine has been reset */
 	u32 reset_engine_count[I915_NUM_ENGINES];
@@ -281,6 +281,8 @@ struct i915_gpu_error {
 	/** Reason for the current *global* reset */
 	const char *reason;
 
+	struct mutex wedge_mutex; /* serialises wedging/unwedging */
+
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
 	 * to release the struct_mutex for the reset to procede.
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 0eb283e7fc96..a4cd459c320d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -188,6 +188,7 @@ struct drm_i915_private *mock_gem_device(void)
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
+	mutex_init(&i915->gpu_error.wedge_mutex);
 
 	i915->wq = alloc_ordered_workqueue("mock", 0);
 	if (!i915->wq)
-- 
2.19.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2018-09-19 19:56 UTC|newest]

Thread overview: 106+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-19 19:55 [PATCH 01/40] drm: Use default dma_fence hooks where possible for null syncobj Chris Wilson
2018-09-19 19:55 ` [PATCH 02/40] drm: Fix syncobj handing of schedule() returning 0 Chris Wilson
2018-09-20 14:13   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 03/40] drm/i915/selftests: Live tests emit requests and so require rpm Chris Wilson
2018-09-20 14:01   ` Tvrtko Ursulin
2018-09-20 14:47     ` Chris Wilson
2018-09-19 19:55 ` [PATCH 04/40] drm/i915: Park the GPU on module load Chris Wilson
2018-09-20 14:02   ` Tvrtko Ursulin
2018-09-20 14:52     ` Chris Wilson
2018-09-19 19:55 ` [PATCH 05/40] drm/i915: Handle incomplete Z_FINISH for compressed error states Chris Wilson
2018-09-19 19:55   ` Chris Wilson
2018-09-19 19:55 ` [PATCH 06/40] drm/i915: Clear the error PTE just once on finish Chris Wilson
2018-09-19 19:55 ` [PATCH 07/40] drm/i915: Cache the error string Chris Wilson
2018-09-19 19:55 ` [PATCH 08/40] drm/i915/execlists: Avoid kicking priority on the current context Chris Wilson
2018-09-19 19:55 ` [PATCH 09/40] drm/i915/selftests: Free the batch along the contexts error path Chris Wilson
2018-09-20  8:30   ` Mika Kuoppala
2018-09-20  8:36     ` Chris Wilson
2018-09-20  9:19       ` Mika Kuoppala
2018-09-19 19:55 ` [PATCH 10/40] drm/i915/selftests: Basic stress test for rapid context switching Chris Wilson
2018-09-20 10:38   ` Mika Kuoppala
2018-09-20 10:46     ` Chris Wilson
2018-09-19 19:55 ` [PATCH 11/40] drm/i915/execlists: Onion unwind for logical_ring_init() failure Chris Wilson
2018-09-20 14:18   ` Mika Kuoppala
2018-09-20 14:21   ` Tvrtko Ursulin
2018-09-20 19:59     ` Chris Wilson
2018-09-21 10:00       ` Tvrtko Ursulin
2018-09-21 10:01         ` Chris Wilson
2018-09-19 19:55 ` [PATCH 12/40] drm/i915/execlists: Assert the queue is non-empty on unsubmitting Chris Wilson
2018-09-24  9:07   ` Tvrtko Ursulin
2018-09-25  7:41     ` Chris Wilson
2018-09-25  8:51       ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 13/40] drm/i915: Reserve some priority bits for internal use Chris Wilson
2018-09-24  9:12   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket Chris Wilson
2018-09-24 10:25   ` Tvrtko Ursulin
2018-09-25  7:55     ` Chris Wilson
2018-09-19 19:55 ` [PATCH 15/40] drm/i915: Priority boost for new clients Chris Wilson
2018-09-24 10:29   ` Tvrtko Ursulin
2018-09-25  8:01     ` Chris Wilson
2018-09-25  8:26       ` Chris Wilson
2018-09-25  8:57         ` Tvrtko Ursulin
2018-09-25  9:06           ` Chris Wilson
2018-09-25  9:08             ` Tvrtko Ursulin
2018-09-25 11:20         ` Michal Wajdeczko
2018-09-19 19:55 ` [PATCH 16/40] drm/i915: Pull scheduling under standalone lock Chris Wilson
2018-09-24 11:19   ` Tvrtko Ursulin
2018-09-25  8:19     ` Chris Wilson
2018-09-25  9:01       ` Tvrtko Ursulin
2018-09-25  9:10         ` Chris Wilson
2018-09-25  9:19           ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 17/40] drm/i915: Priority boost for waiting clients Chris Wilson
2018-09-24 11:29   ` Tvrtko Ursulin
2018-09-25  9:00     ` Chris Wilson
2018-09-25  9:07       ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 18/40] drm/i915: Report the number of closed vma held by each context in debugfs Chris Wilson
2018-09-24 11:57   ` Tvrtko Ursulin
2018-09-25 12:20     ` Chris Wilson
2018-09-19 19:55 ` [PATCH 19/40] drm/i915: Remove debugfs/i915_ppgtt_info Chris Wilson
2018-09-24 12:03   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 20/40] drm/i915: Track all held rpm wakerefs Chris Wilson
2018-09-19 19:55 ` [PATCH 21/40] drm/i915: Markup paired operations on wakerefs Chris Wilson
2018-09-19 19:55 ` [PATCH 22/40] drm/i915: Syntatic sugar for using intel_runtime_pm Chris Wilson
2018-09-24 12:08   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 23/40] drm/i915: Markup paired operations on display power domains Chris Wilson
2018-09-19 19:55 ` [PATCH 24/40] drm/i915: Track the wakeref used to initialise " Chris Wilson
2018-09-19 19:55 ` [PATCH 25/40] drm/i915/dp: Markup pps lock power well Chris Wilson
2018-09-19 19:55 ` [PATCH 26/40] drm/i915: Complain if hsw_get_pipe_config acquires the same power well twice Chris Wilson
2018-09-19 19:55 ` [PATCH 27/40] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
2018-09-19 19:55 ` Chris Wilson [this message]
2018-09-19 19:55 ` [PATCH 29/40] drm/i915: Differentiate between ggtt->mutex and ppgtt->mutex Chris Wilson
2018-09-24 13:04   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 30/40] drm/i915: Pull all the reset functionality together into i915_reset.c Chris Wilson
2018-09-19 19:55 ` [PATCH 31/40] drm/i915: Make all GPU resets atomic Chris Wilson
2018-09-19 19:55 ` [PATCH 32/40] drm/i915: Introduce the i915_user_extension_method Chris Wilson
2018-09-24 13:20   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 33/40] drm/i915: Extend CREATE_CONTEXT to allow inheritance ala clone() Chris Wilson
2018-09-24 17:22   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 34/40] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
2018-09-25  8:45   ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 35/40] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
2018-09-25  8:46   ` [Intel-gfx] " Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 36/40] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
2018-09-19 19:55 ` [PATCH 37/40] drm/i915: Allow a context to define its set of engines Chris Wilson
2018-09-27 11:28   ` Tvrtko Ursulin
2018-09-28 20:22     ` Chris Wilson
2018-10-01  8:30       ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 38/40] drm/i915/execlists: Flush the CS events before unpinning Chris Wilson
2018-10-01 10:51   ` Tvrtko Ursulin
2018-10-01 11:06     ` Chris Wilson
2018-10-01 13:15       ` Tvrtko Ursulin
2018-10-01 13:26         ` Chris Wilson
2018-10-01 14:03           ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 39/40] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
2018-09-27 11:32   ` Tvrtko Ursulin
2018-09-28 20:11     ` Chris Wilson
2018-10-01  8:14       ` Tvrtko Ursulin
2018-10-01  8:18         ` Chris Wilson
2018-10-01 10:18           ` Tvrtko Ursulin
2018-09-19 19:55 ` [PATCH 40/40] drm/i915: Load balancing across a virtual engine Chris Wilson
2018-10-01 11:37   ` Tvrtko Ursulin
2018-09-19 21:54 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/40] drm: Use default dma_fence hooks where possible for null syncobj Patchwork
2018-09-19 22:08 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-09-19 22:17 ` ✗ Fi.CI.BAT: failure " Patchwork
2018-09-20 13:34 ` [PATCH 01/40] " Tvrtko Ursulin
2018-09-20 13:40   ` Chris Wilson
2018-09-20 13:54     ` Tvrtko Ursulin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180919195544.1511-28-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.