All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling
@ 2019-04-25  9:19 Chris Wilson
  2019-04-25  9:19 ` [PATCH 02/45] drm/i915/gvt: Pin the per-engine GVT shadow contexts Chris Wilson
                   ` (51 more replies)
  0 siblings, 52 replies; 74+ messages in thread
From: Chris Wilson @ 2019-04-25  9:19 UTC (permalink / raw)
  To: intel-gfx

Currently there is an underlying assumption that i915_request_unsubmit()
is synchronous wrt the GPU -- that is the request is no longer in flight
as we remove it. In the near future that may change, and this may upset
our signaling as we can process an interrupt for that request while it
is no longer in flight.

CPU0					CPU1
intel_engine_breadcrumbs_irq
(queue request completion)
					i915_request_cancel_signaling
...					...
					i915_request_enable_signaling
dma_fence_signal

Hence in the time it took us to drop the lock to signal the request, a
preemption event may have occurred and re-queued the request. In the
process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and
so reused the rq->signal_link that was in use on CPU0, leading to bad
pointer chasing in intel_engine_breadcrumbs_irq.

A related issue was that if someone started listening for a signal on a
completed but no longer in-flight request, we missed the opportunity to
immediately signal that request.

Furthermore, as intel_contexts may be immediately released during
request retirement, in order to be entirely sure that
intel_engine_breadcrumbs_irq may no longer dereference the intel_context
(ce->signals and ce->signal_link), we must wait for irq spinlock.

In order to prevent the race, we use a bit in the fence.flags to signal
the transfer onto the signal list inside intel_engine_breadcrumbs_irq.
For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then
quickly signals to any outside observer that the fence is indeed signaled.

Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/dma-buf/dma-fence.c                 |  1 +
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 58 +++++++++++++--------
 drivers/gpu/drm/i915/i915_request.c         |  1 +
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 3aa8733f832a..9bf06042619a 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -29,6 +29,7 @@
 
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
 EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
+EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled);
 
 static DEFINE_SPINLOCK(dma_fence_stub_lock);
 static struct dma_fence dma_fence_stub;
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 3cbffd400b1b..4283224249d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kthread.h>
+#include <trace/events/dma_fence.h>
 #include <uapi/linux/sched/types.h>
 
 #include "i915_drv.h"
@@ -83,6 +84,7 @@ static inline bool __request_completed(const struct i915_request *rq)
 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	const ktime_t timestamp = ktime_get();
 	struct intel_context *ce, *cn;
 	struct list_head *pos, *next;
 	LIST_HEAD(signal);
@@ -104,6 +106,11 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 
 			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
 					     &rq->fence.flags));
+			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+			if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+					     &rq->fence.flags))
+				continue;
 
 			/*
 			 * Queue for execution after dropping the signaling
@@ -111,14 +118,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 			 * more signalers to the same context or engine.
 			 */
 			i915_request_get(rq);
-
-			/*
-			 * We may race with direct invocation of
-			 * dma_fence_signal(), e.g. i915_request_retire(),
-			 * so we need to acquire our reference to the request
-			 * before we cancel the breadcrumb.
-			 */
-			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 			list_add_tail(&rq->signal_link, &signal);
 		}
 
@@ -140,8 +139,21 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 	list_for_each_safe(pos, next, &signal) {
 		struct i915_request *rq =
 			list_entry(pos, typeof(*rq), signal_link);
+		struct dma_fence_cb *cur, *tmp;
+
+		trace_dma_fence_signaled(&rq->fence);
+
+		rq->fence.timestamp = timestamp;
+		set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &rq->fence.flags);
+
+		spin_lock(&rq->lock);
+		list_for_each_entry_safe(cur, tmp, &rq->fence.cb_list, node) {
+			INIT_LIST_HEAD(&cur->node);
+			cur->func(&rq->fence, cur);
+		}
+		INIT_LIST_HEAD(&rq->fence.cb_list);
+		spin_unlock(&rq->lock);
 
-		dma_fence_signal(&rq->fence);
 		i915_request_put(rq);
 	}
 }
@@ -243,19 +255,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 
 bool i915_request_enable_breadcrumb(struct i915_request *rq)
 {
-	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
-
-	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
-
-	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
-		return true;
+	lockdep_assert_held(&rq->lock);
+	lockdep_assert_irqs_disabled();
 
-	spin_lock(&b->irq_lock);
-	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
-	    !__request_completed(rq)) {
+	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
+		struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 		struct intel_context *ce = rq->hw_context;
 		struct list_head *pos;
 
+		spin_lock(&b->irq_lock);
+		GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+
 		__intel_breadcrumbs_arm_irq(b);
 
 		/*
@@ -284,8 +294,8 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
 			list_move_tail(&ce->signal_link, &b->signalers);
 
 		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+		spin_unlock(&b->irq_lock);
 	}
-	spin_unlock(&b->irq_lock);
 
 	return !__request_completed(rq);
 }
@@ -294,9 +304,15 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
 {
 	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 
-	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
-		return;
+	lockdep_assert_held(&rq->lock);
+	lockdep_assert_irqs_disabled();
 
+	/*
+	 * We must wait for b->irq_lock so that we know the interrupt handler
+	 * has released its reference to the intel_context and has completed
+	 * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
+	 * required).
+	 */
 	spin_lock(&b->irq_lock);
 	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
 		struct intel_context *ce = rq->hw_context;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 5869c37a35e1..fa9037173f1d 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -417,6 +417,7 @@ void __i915_request_submit(struct i915_request *request)
 	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
 	    !i915_request_enable_breadcrumb(request))
 		intel_engine_queue_breadcrumbs(engine);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 74+ messages in thread

end of thread, other threads:[~2019-05-08  8:48 UTC | newest]

Thread overview: 74+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-25  9:19 [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Chris Wilson
2019-04-25  9:19 ` [PATCH 02/45] drm/i915/gvt: Pin the per-engine GVT shadow contexts Chris Wilson
2019-04-25  9:19 ` [PATCH 03/45] drm/i915: Export intel_context_instance() Chris Wilson
2019-04-25  9:19 ` [PATCH 04/45] drm/i915/selftests: Use the real kernel context for sseu isolation tests Chris Wilson
2019-04-25  9:19 ` [PATCH 05/45] drm/i915/selftests: Pass around intel_context for sseu Chris Wilson
2019-04-25  9:19 ` [PATCH 06/45] drm/i915: Pass intel_context to intel_context_pin_lock() Chris Wilson
2019-04-25  9:19 ` [PATCH 07/45] drm/i915: Split engine setup/init into two phases Chris Wilson
2019-04-25  9:19 ` [PATCH 08/45] drm/i915: Switch back to an array of logical per-engine HW contexts Chris Wilson
2019-04-25  9:19 ` [PATCH 09/45] drm/i915: Remove intel_context.active_link Chris Wilson
2019-04-25  9:19 ` [PATCH 10/45] drm/i915: Move i915_request_alloc into selftests/ Chris Wilson
2019-04-25  9:19 ` [PATCH 11/45] drm/i915/execlists: Flush the tasklet on parking Chris Wilson
2019-04-25  9:19 ` [PATCH 12/45] drm/i915: Move the engine->destroy() vfunc onto the engine Chris Wilson
2019-04-25 11:01   ` Tvrtko Ursulin
2019-04-25 11:24     ` Chris Wilson
2019-04-25 11:29     ` [PATCH v2] " Chris Wilson
2019-04-25 11:46       ` Tvrtko Ursulin
2019-04-25  9:19 ` [PATCH 13/45] drm/i915: Convert inconsistent static engine tables into an init error Chris Wilson
2019-04-25  9:19 ` [PATCH 14/45] drm/i915: Make engine_mask & num_engines static Chris Wilson
2019-04-25  9:30   ` Jani Nikula
2019-04-25 10:20   ` Tvrtko Ursulin
2019-04-25 10:30     ` Chris Wilson
2019-04-25 10:43       ` Tvrtko Ursulin
2019-04-25  9:19 ` [PATCH 15/45] drm/i915: Restore control over ppgtt for context creation ABI Chris Wilson
2019-04-25  9:19 ` [PATCH 16/45] drm/i915: Allow a context to define its set of engines Chris Wilson
2019-04-25  9:19 ` [PATCH 17/45] drm/i915: Re-expose SINGLE_TIMELINE flags for context creation Chris Wilson
2019-04-25  9:19 ` [PATCH 18/45] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
2019-04-25  9:19 ` [PATCH 19/45] drm/i915: Load balancing across a virtual engine Chris Wilson
2019-04-25 12:14   ` Tvrtko Ursulin
2019-04-25 12:23     ` Chris Wilson
2019-04-29 13:43   ` Tvrtko Ursulin
2019-04-25  9:19 ` [PATCH 20/45] drm/i915: Apply an execution_mask to the virtual_engine Chris Wilson
2019-04-29 14:12   ` Tvrtko Ursulin
2019-05-07 16:59     ` Chris Wilson
2019-05-08  8:48       ` Tvrtko Ursulin
2019-04-25  9:19 ` [PATCH 21/45] drm/i915: Extend execution fence to support a callback Chris Wilson
2019-04-25  9:19 ` [PATCH 22/45] drm/i915/execlists: Virtual engine bonding Chris Wilson
2019-04-29 15:58   ` Tvrtko Ursulin
2019-04-25  9:19 ` [PATCH 23/45] drm/i915: Allow specification of parallel execbuf Chris Wilson
2019-04-25  9:19 ` [PATCH 24/45] drm/i915: Split GEM object type definition to its own header Chris Wilson
2019-04-26 12:12   ` Jani Nikula
2019-04-29  9:36     ` Joonas Lahtinen
2019-04-29 17:52       ` Rodrigo Vivi
2019-04-25  9:19 ` [PATCH 25/45] drm/i915: Pull GEM ioctls interface to its own file Chris Wilson
2019-04-25  9:19 ` [PATCH 26/45] drm/i915: Move object->pages API to i915_gem_object.[ch] Chris Wilson
2019-04-25  9:19 ` [PATCH 27/45] drm/i915: Move shmem object setup to its own file Chris Wilson
2019-04-25  9:19 ` [PATCH 28/45] drm/i915: Move phys objects " Chris Wilson
2019-04-25  9:19 ` [PATCH 29/45] drm/i915: Move mmap and friends " Chris Wilson
2019-04-25  9:19 ` [PATCH 30/45] drm/i915: Move GEM domain management " Chris Wilson
2019-04-25  9:19 ` [PATCH 31/45] drm/i915: Move more GEM objects under gem/ Chris Wilson
2019-04-25  9:19 ` [PATCH 32/45] drm/i915: Pull scatterlist utils out of i915_gem.h Chris Wilson
2019-04-25  9:19 ` [PATCH 33/45] lockdep: Swap storage for pin_count and refereneces Chris Wilson
2019-04-25  9:19 ` [PATCH 34/45] drm/i915: Move GEM object domain management from struct_mutex to local Chris Wilson
2019-04-25  9:19 ` [PATCH 35/45] drm/i915: Move GEM object waiting to its own file Chris Wilson
2019-04-25  9:19 ` [PATCH 36/45] drm/i915: Move GEM object busy checking " Chris Wilson
2019-04-25  9:19 ` [PATCH 37/45] drm/i915: Move GEM client throttling " Chris Wilson
2019-04-25  9:19 ` [PATCH 38/45] drm/i915: Drop the deferred active reference Chris Wilson
2019-05-01 10:34   ` Matthew Auld
2019-04-25  9:19 ` [PATCH 39/45] drm/i915: Move object close under its own lock Chris Wilson
2019-04-25  9:19 ` [PATCH 40/45] drm/i915: Rename intel_context.active to .inflight Chris Wilson
2019-04-25  9:20 ` [PATCH 41/45] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
2019-04-25  9:20 ` [PATCH 42/45] drm/i915: Stop retiring along engine Chris Wilson
2019-04-25  9:20 ` [PATCH 43/45] drm/i915: Replace engine->timeline with a plain list Chris Wilson
2019-04-25  9:20 ` [PATCH 44/45] drm/i915/execlists: Preempt-to-busy Chris Wilson
2019-04-25  9:20 ` [PATCH 45/45] drm/i915/execlists: Minimalistic timeslicing Chris Wilson
2019-04-25 10:35 ` [PATCH 01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling Tvrtko Ursulin
2019-04-25 10:42   ` Chris Wilson
2019-04-26 10:40     ` Tvrtko Ursulin
2019-04-25 11:22 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/45] " Patchwork
2019-04-25 11:39 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-04-25 12:07 ` ✓ Fi.CI.BAT: success " Patchwork
2019-04-25 12:09 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/45] drm/i915: Seal races between async GPU cancellation, retirement and signaling (rev2) Patchwork
2019-04-25 12:25 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-04-25 12:56 ` ✓ Fi.CI.BAT: success " Patchwork
2019-04-25 21:52 ` ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.