All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
@ 2018-01-22 15:41 Chris Wilson
  2018-01-22 16:07 ` ✗ Fi.CI.BAT: warning for " Patchwork
                   ` (7 more replies)
  0 siblings, 8 replies; 21+ messages in thread
From: Chris Wilson @ 2018-01-22 15:41 UTC (permalink / raw)
  To: intel-gfx

If we remember to cancel the signaler on a request when retiring it
(after we know that the request has been signaled), we do not need to
carry an additional request in the signaler itself. This prevents an
issue whereby the signaler threads may be delayed and hold on to
thousands of request references, causing severe memory fragmentation and
premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
and frequent use of inter-engine fences).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c  |   6 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 149 +++++++++++++++++--------------
 2 files changed, 85 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a0f451b4a4e8..e4eca6ba0d05 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -441,7 +441,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	spin_lock_irq(&request->lock);
 	if (request->waitboost)
 		atomic_dec(&request->i915->gt_pm.rps.num_waiters);
-	dma_fence_signal_locked(&request->fence);
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
+		dma_fence_signal_locked(&request->fence);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+		intel_engine_cancel_signaling(request);
 	spin_unlock_irq(&request->lock);
 
 	i915_priotree_fini(request->i915, &request->priotree);
@@ -738,6 +741,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	/* No zalloc, must clear what we need by hand */
 	req->global_seqno = 0;
+	req->signaling.wait.seqno = 0;
 	req->file_priv = NULL;
 	req->batch = NULL;
 	req->capture_list = NULL;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 86acac010bb8..e3667dc1e96d 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -235,7 +235,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 	struct intel_wait *wait, *n;
 
 	if (!b->irq_armed)
-		goto wakeup_signaler;
+		return;
 
 	/*
 	 * We only disarm the irq when we are idle (all requests completed),
@@ -260,14 +260,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 	b->waiters = RB_ROOT;
 
 	spin_unlock_irq(&b->rb_lock);
-
-	/*
-	 * The signaling thread may be asleep holding a reference to a request,
-	 * that had its signaling cancelled prior to being preempted. We need
-	 * to kick the signaler, just in case, to release any such reference.
-	 */
-wakeup_signaler:
-	wake_up_process(b->signaler);
 }
 
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -644,6 +636,62 @@ static void signaler_set_rtpriority(void)
 	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
 }
 
+static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
+					 struct drm_i915_gem_request *request)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	lockdep_assert_held(&b->rb_lock);
+
+	/*
+	 * Wake up all other completed waiters and select the
+	 * next bottom-half for the next user interrupt.
+	 */
+	__intel_engine_remove_wait(engine, &request->signaling.wait);
+
+	/*
+	 * Find the next oldest signal. Note that as we have
+	 * not been holding the lock, another client may
+	 * have installed an even older signal than the one
+	 * we just completed - so double check we are still
+	 * the oldest before picking the next one.
+	 */
+	if (request->signaling.wait.seqno) {
+		if (request == rcu_access_pointer(b->first_signal)) {
+			struct rb_node *rb = rb_next(&request->signaling.node);
+			rcu_assign_pointer(b->first_signal,
+					   rb ? to_signaler(rb) : NULL);
+		}
+
+		rb_erase(&request->signaling.node, &b->signals);
+		request->signaling.wait.seqno = 0;
+	}
+}
+
+static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
+{
+	/*
+	 * See the big warnings for i915_gem_active_get_rcu() and similarly
+	 * for dma_fence_get_rcu_safe() that explain the intricacies involved
+	 * here with defeating CPU/compiler speculation and enforcing
+	 * the required memory barriers.
+	 */
+	do {
+		struct drm_i915_gem_request *request;
+
+		request = rcu_dereference(b->first_signal);
+		if (request)
+			request = i915_gem_request_get_rcu(request);
+
+		barrier();
+
+		if (!request || request == rcu_access_pointer(b->first_signal))
+			return rcu_pointer_handoff(request);
+
+		i915_gem_request_put(request);
+	} while (1);
+}
+
 static int intel_breadcrumbs_signaler(void *arg)
 {
 	struct intel_engine_cs *engine = arg;
@@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
 		 * a new client.
 		 */
 		rcu_read_lock();
-		request = rcu_dereference(b->first_signal);
-		if (request)
-			request = i915_gem_request_get_rcu(request);
+		request = first_signal(b);
 		rcu_read_unlock();
 		if (signal_complete(request)) {
-			local_bh_disable();
-			dma_fence_signal(&request->fence);
-			local_bh_enable(); /* kick start the tasklets */
-
-			spin_lock_irq(&b->rb_lock);
-
-			/* Wake up all other completed waiters and select the
-			 * next bottom-half for the next user interrupt.
-			 */
-			__intel_engine_remove_wait(engine,
-						   &request->signaling.wait);
-
-			/* Find the next oldest signal. Note that as we have
-			 * not been holding the lock, another client may
-			 * have installed an even older signal than the one
-			 * we just completed - so double check we are still
-			 * the oldest before picking the next one.
-			 */
-			if (request == rcu_access_pointer(b->first_signal)) {
-				struct rb_node *rb =
-					rb_next(&request->signaling.node);
-				rcu_assign_pointer(b->first_signal,
-						   rb ? to_signaler(rb) : NULL);
+			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				      &request->fence.flags)) {
+				local_bh_disable();
+				dma_fence_signal(&request->fence);
+				local_bh_enable(); /* kick start the tasklets */
 			}
-			rb_erase(&request->signaling.node, &b->signals);
-			RB_CLEAR_NODE(&request->signaling.node);
-
-			spin_unlock_irq(&b->rb_lock);
 
-			i915_gem_request_put(request);
+			if (request->signaling.wait.seqno) {
+				spin_lock_irq(&b->rb_lock);
+				__intel_engine_remove_signal(engine, request);
+				spin_unlock_irq(&b->rb_lock);
+			}
 
 			/* If the engine is saturated we may be continually
 			 * processing completed requests. This angers the
@@ -712,19 +740,17 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			do_schedule = need_resched();
 		}
+		i915_gem_request_put(request);
 
 		if (unlikely(do_schedule)) {
 			if (kthread_should_park())
 				kthread_parkme();
 
-			if (unlikely(kthread_should_stop())) {
-				i915_gem_request_put(request);
+			if (unlikely(kthread_should_stop()))
 				break;
-			}
 
 			schedule();
 		}
-		i915_gem_request_put(request);
 	} while (1);
 	__set_current_state(TASK_RUNNING);
 
@@ -753,12 +779,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	if (!seqno)
 		return;
 
+	spin_lock(&b->rb_lock);
+
+	GEM_BUG_ON(request->signaling.wait.seqno);
 	request->signaling.wait.tsk = b->signaler;
 	request->signaling.wait.request = request;
 	request->signaling.wait.seqno = seqno;
-	i915_gem_request_get(request);
-
-	spin_lock(&b->rb_lock);
 
 	/* First add ourselves into the list of waiters, but register our
 	 * bottom-half as the signaller thread. As per usual, only the oldest
@@ -797,7 +823,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 			rcu_assign_pointer(b->first_signal, request);
 	} else {
 		__intel_engine_remove_wait(engine, &request->signaling.wait);
-		i915_gem_request_put(request);
+		request->signaling.wait.seqno = 0;
 		wakeup = false;
 	}
 
@@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 
 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&request->lock);
-	GEM_BUG_ON(!request->signaling.wait.seqno);
 
-	spin_lock(&b->rb_lock);
+	if (request->signaling.wait.seqno) {
+		struct intel_engine_cs *engine = request->engine;
+		struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	if (!RB_EMPTY_NODE(&request->signaling.node)) {
-		if (request == rcu_access_pointer(b->first_signal)) {
-			struct rb_node *rb =
-				rb_next(&request->signaling.node);
-			rcu_assign_pointer(b->first_signal,
-					   rb ? to_signaler(rb) : NULL);
-		}
-		rb_erase(&request->signaling.node, &b->signals);
-		RB_CLEAR_NODE(&request->signaling.node);
-		i915_gem_request_put(request);
+		spin_lock(&b->rb_lock);
+		__intel_engine_remove_signal(engine, request);
+		spin_unlock(&b->rb_lock);
 	}
-
-	__intel_engine_remove_wait(engine, &request->signaling.wait);
-
-	spin_unlock(&b->rb_lock);
-
-	request->signaling.wait.seqno = 0;
 }
 
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* ✗ Fi.CI.BAT: warning for drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
@ 2018-01-22 16:07 ` Patchwork
  2018-01-22 19:01 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2018-01-22 16:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread
URL   : https://patchwork.freedesktop.org/series/36908/
State : warning

== Summary ==

Series 36908v1 drm/i915/breadcrumbs: Drop request reference for the signaler thread
https://patchwork.freedesktop.org/api/1.0/series/36908/revisions/1/mbox/

Test debugfs_test:
        Subgroup read_all_entries:
                fail       -> DMESG-WARN (fi-elk-e7500) fdo#103989 +1
Test gem_exec_suspend:
        Subgroup basic-s4-devices:
                pass       -> DMESG-WARN (fi-kbl-7500u)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713

fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:428s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:371s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:488s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:281s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:482s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:483s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:466s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:456s
fi-elk-e7500     total:224  pass:168  dwarn:10  dfail:0   fail:0   skip:45 
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:279s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:513s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:398s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:399s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:412s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:460s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:416s
fi-kbl-7500u     total:288  pass:262  dwarn:2   dfail:0   fail:0   skip:24  time:459s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:497s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:453s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:504s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:572s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:431s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:510s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:529s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:486s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:492s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:416s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:436s
fi-snb-2520m     total:245  pass:211  dwarn:0   dfail:0   fail:0   skip:33 
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:399s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:568s
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:470s
fi-bdw-gvtdvm failed to collect. IGT log at Patchwork_7740/fi-bdw-gvtdvm/igt.log

0edb3ea337349387863e60ed0bdf53b9b2767cf4 drm-tip: 2018y-01m-22d-14h-27m-52s UTC integration manifest
c6f6fdb0d9c0 drm/i915/breadcrumbs: Drop request reference for the signaler thread

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7740/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
  2018-01-22 16:07 ` ✗ Fi.CI.BAT: warning for " Patchwork
@ 2018-01-22 19:01 ` Patchwork
  2018-01-23  0:03 ` ✗ Fi.CI.IGT: failure " Patchwork
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2018-01-22 19:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread
URL   : https://patchwork.freedesktop.org/series/36908/
State : success

== Summary ==

Series 36908v1 drm/i915/breadcrumbs: Drop request reference for the signaler thread
https://patchwork.freedesktop.org/api/1.0/series/36908/revisions/1/mbox/

Test debugfs_test:
        Subgroup read_all_entries:
                dmesg-fail -> DMESG-WARN (fi-elk-e7500) fdo#103989
Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                fail       -> PASS       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                incomplete -> PASS       (fi-snb-2520m) fdo#103713

fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:420s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:427s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:374s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:489s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:280s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:482s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:482s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:468s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:455s
fi-elk-e7500     total:224  pass:168  dwarn:10  dfail:0   fail:0   skip:45 
fi-gdg-551       total:288  pass:180  dwarn:0   dfail:0   fail:0   skip:108 time:280s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:517s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:392s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:399s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:412s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:466s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:416s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:460s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:494s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:454s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:501s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:585s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:434s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:515s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:530s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:488s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:491s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:422s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:430s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:516s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:398s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:569s
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:472s

06c8efda323ac918fad0e26d81e8884574ec8b84 drm-tip: 2018y-01m-22d-17h-43m-26s UTC integration manifest
a40183e96a23 drm/i915/breadcrumbs: Drop request reference for the signaler thread

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7743/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
  2018-01-22 16:07 ` ✗ Fi.CI.BAT: warning for " Patchwork
  2018-01-22 19:01 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-01-23  0:03 ` Patchwork
  2018-01-24 13:09 ` [PATCH] " Tvrtko Ursulin
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2018-01-23  0:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread
URL   : https://patchwork.freedesktop.org/series/36908/
State : failure

== Summary ==

Test kms_cursor_crc:
        Subgroup cursor-256x256-suspend:
                incomplete -> PASS       (shard-hsw) fdo#103375 +1
Test kms_flip:
        Subgroup 2x-flip-vs-expired-vblank:
                fail       -> PASS       (shard-hsw)
        Subgroup vblank-vs-suspend:
                pass       -> INCOMPLETE (shard-hsw) fdo#100368
Test perf:
        Subgroup polling:
                fail       -> PASS       (shard-hsw) fdo#102252
Test gem_eio:
        Subgroup in-flight-external:
                fail       -> PASS       (shard-hsw) fdo#104676
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-primscrn-cur-indfb-move:
                pass       -> SKIP       (shard-snb) fdo#101623 +1
Test perf_pmu:
        Subgroup multi-client-vcs0:
                pass       -> FAIL       (shard-apl)

fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#104676 https://bugs.freedesktop.org/show_bug.cgi?id=104676
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623

shard-apl        total:2753 pass:1713 dwarn:1   dfail:0   fail:26  skip:1013 time:13941s
shard-hsw        total:2721 pass:1706 dwarn:1   dfail:0   fail:10  skip:1002 time:14634s
shard-snb        total:2753 pass:1317 dwarn:1   dfail:0   fail:10  skip:1425 time:7887s
Blacklisted hosts:
shard-kbl        total:2753 pass:1812 dwarn:24  dfail:2   fail:25  skip:890 time:10845s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7740/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
                   ` (2 preceding siblings ...)
  2018-01-23  0:03 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-01-24 13:09 ` Tvrtko Ursulin
  2018-01-24 14:44   ` Chris Wilson
  2018-01-31 10:02 ` Chris Wilson
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 13:09 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/01/2018 15:41, Chris Wilson wrote:
> If we remember to cancel the signaler on a request when retiring it
> (after we know that the request has been signaled), we do not need to
> carry an additional request in the signaler itself. This prevents an
> issue whereby the signaler threads may be delayed and hold on to
> thousands of request references, causing severe memory fragmentation and
> premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> and frequent use of inter-engine fences).

What is starving the signaler thread, which is set to SCHED_FIFO, and 
can't be tasklets on SNB?
Before I actually start revieweing the code, which I'd rather avoid :) :

Is it just not able to process enough requests in it's time-slice 
(need_resched) so is falling behind? It would be surprising since I 
would expect it to be much lighter wait processing there, per request, 
than on the submission paths.

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c  |   6 +-
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 149 +++++++++++++++++--------------
>   2 files changed, 85 insertions(+), 70 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index a0f451b4a4e8..e4eca6ba0d05 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -441,7 +441,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>   	spin_lock_irq(&request->lock);
>   	if (request->waitboost)
>   		atomic_dec(&request->i915->gt_pm.rps.num_waiters);
> -	dma_fence_signal_locked(&request->fence);
> +	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
> +		dma_fence_signal_locked(&request->fence);
> +	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> +		intel_engine_cancel_signaling(request);
>   	spin_unlock_irq(&request->lock);
>   
>   	i915_priotree_fini(request->i915, &request->priotree);
> @@ -738,6 +741,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>   
>   	/* No zalloc, must clear what we need by hand */
>   	req->global_seqno = 0;
> +	req->signaling.wait.seqno = 0;
>   	req->file_priv = NULL;
>   	req->batch = NULL;
>   	req->capture_list = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 86acac010bb8..e3667dc1e96d 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -235,7 +235,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
>   	struct intel_wait *wait, *n;
>   
>   	if (!b->irq_armed)
> -		goto wakeup_signaler;
> +		return;
>   
>   	/*
>   	 * We only disarm the irq when we are idle (all requests completed),
> @@ -260,14 +260,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
>   	b->waiters = RB_ROOT;
>   
>   	spin_unlock_irq(&b->rb_lock);
> -
> -	/*
> -	 * The signaling thread may be asleep holding a reference to a request,
> -	 * that had its signaling cancelled prior to being preempted. We need
> -	 * to kick the signaler, just in case, to release any such reference.
> -	 */
> -wakeup_signaler:
> -	wake_up_process(b->signaler);
>   }
>   
>   static bool use_fake_irq(const struct intel_breadcrumbs *b)
> @@ -644,6 +636,62 @@ static void signaler_set_rtpriority(void)
>   	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
>   }
>   
> +static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
> +					 struct drm_i915_gem_request *request)
> +{
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> +	lockdep_assert_held(&b->rb_lock);
> +
> +	/*
> +	 * Wake up all other completed waiters and select the
> +	 * next bottom-half for the next user interrupt.
> +	 */
> +	__intel_engine_remove_wait(engine, &request->signaling.wait);
> +
> +	/*
> +	 * Find the next oldest signal. Note that as we have
> +	 * not been holding the lock, another client may
> +	 * have installed an even older signal than the one
> +	 * we just completed - so double check we are still
> +	 * the oldest before picking the next one.
> +	 */
> +	if (request->signaling.wait.seqno) {
> +		if (request == rcu_access_pointer(b->first_signal)) {
> +			struct rb_node *rb = rb_next(&request->signaling.node);
> +			rcu_assign_pointer(b->first_signal,
> +					   rb ? to_signaler(rb) : NULL);
> +		}
> +
> +		rb_erase(&request->signaling.node, &b->signals);
> +		request->signaling.wait.seqno = 0;
> +	}
> +}
> +
> +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> +{
> +	/*
> +	 * See the big warnings for i915_gem_active_get_rcu() and similarly
> +	 * for dma_fence_get_rcu_safe() that explain the intricacies involved
> +	 * here with defeating CPU/compiler speculation and enforcing
> +	 * the required memory barriers.
> +	 */
> +	do {
> +		struct drm_i915_gem_request *request;
> +
> +		request = rcu_dereference(b->first_signal);
> +		if (request)
> +			request = i915_gem_request_get_rcu(request);
> +
> +		barrier();
> +
> +		if (!request || request == rcu_access_pointer(b->first_signal))
> +			return rcu_pointer_handoff(request);
> +
> +		i915_gem_request_put(request);
> +	} while (1);
> +}
> +
>   static int intel_breadcrumbs_signaler(void *arg)
>   {
>   	struct intel_engine_cs *engine = arg;
> @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
>   		 * a new client.
>   		 */
>   		rcu_read_lock();
> -		request = rcu_dereference(b->first_signal);
> -		if (request)
> -			request = i915_gem_request_get_rcu(request);
> +		request = first_signal(b);
>   		rcu_read_unlock();
>   		if (signal_complete(request)) {
> -			local_bh_disable();
> -			dma_fence_signal(&request->fence);
> -			local_bh_enable(); /* kick start the tasklets */
> -
> -			spin_lock_irq(&b->rb_lock);
> -
> -			/* Wake up all other completed waiters and select the
> -			 * next bottom-half for the next user interrupt.
> -			 */
> -			__intel_engine_remove_wait(engine,
> -						   &request->signaling.wait);
> -
> -			/* Find the next oldest signal. Note that as we have
> -			 * not been holding the lock, another client may
> -			 * have installed an even older signal than the one
> -			 * we just completed - so double check we are still
> -			 * the oldest before picking the next one.
> -			 */
> -			if (request == rcu_access_pointer(b->first_signal)) {
> -				struct rb_node *rb =
> -					rb_next(&request->signaling.node);
> -				rcu_assign_pointer(b->first_signal,
> -						   rb ? to_signaler(rb) : NULL);
> +			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +				      &request->fence.flags)) {
> +				local_bh_disable();
> +				dma_fence_signal(&request->fence);
> +				local_bh_enable(); /* kick start the tasklets */
>   			}
> -			rb_erase(&request->signaling.node, &b->signals);
> -			RB_CLEAR_NODE(&request->signaling.node);
> -
> -			spin_unlock_irq(&b->rb_lock);
>   
> -			i915_gem_request_put(request);
> +			if (request->signaling.wait.seqno) {
> +				spin_lock_irq(&b->rb_lock);
> +				__intel_engine_remove_signal(engine, request);
> +				spin_unlock_irq(&b->rb_lock);
> +			}
>   
>   			/* If the engine is saturated we may be continually
>   			 * processing completed requests. This angers the
> @@ -712,19 +740,17 @@ static int intel_breadcrumbs_signaler(void *arg)
>   			 */
>   			do_schedule = need_resched();
>   		}
> +		i915_gem_request_put(request);
>   
>   		if (unlikely(do_schedule)) {
>   			if (kthread_should_park())
>   				kthread_parkme();
>   
> -			if (unlikely(kthread_should_stop())) {
> -				i915_gem_request_put(request);
> +			if (unlikely(kthread_should_stop()))
>   				break;
> -			}
>   
>   			schedule();
>   		}
> -		i915_gem_request_put(request);
>   	} while (1);
>   	__set_current_state(TASK_RUNNING);
>   
> @@ -753,12 +779,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>   	if (!seqno)
>   		return;
>   
> +	spin_lock(&b->rb_lock);
> +
> +	GEM_BUG_ON(request->signaling.wait.seqno);
>   	request->signaling.wait.tsk = b->signaler;
>   	request->signaling.wait.request = request;
>   	request->signaling.wait.seqno = seqno;
> -	i915_gem_request_get(request);
> -
> -	spin_lock(&b->rb_lock);
>   
>   	/* First add ourselves into the list of waiters, but register our
>   	 * bottom-half as the signaller thread. As per usual, only the oldest
> @@ -797,7 +823,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>   			rcu_assign_pointer(b->first_signal, request);
>   	} else {
>   		__intel_engine_remove_wait(engine, &request->signaling.wait);
> -		i915_gem_request_put(request);
> +		request->signaling.wait.seqno = 0;
>   		wakeup = false;
>   	}
>   
> @@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>   
>   void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
>   {
> -	struct intel_engine_cs *engine = request->engine;
> -	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
>   	GEM_BUG_ON(!irqs_disabled());
>   	lockdep_assert_held(&request->lock);
> -	GEM_BUG_ON(!request->signaling.wait.seqno);
>   
> -	spin_lock(&b->rb_lock);
> +	if (request->signaling.wait.seqno) {
> +		struct intel_engine_cs *engine = request->engine;
> +		struct intel_breadcrumbs *b = &engine->breadcrumbs;
>   
> -	if (!RB_EMPTY_NODE(&request->signaling.node)) {
> -		if (request == rcu_access_pointer(b->first_signal)) {
> -			struct rb_node *rb =
> -				rb_next(&request->signaling.node);
> -			rcu_assign_pointer(b->first_signal,
> -					   rb ? to_signaler(rb) : NULL);
> -		}
> -		rb_erase(&request->signaling.node, &b->signals);
> -		RB_CLEAR_NODE(&request->signaling.node);
> -		i915_gem_request_put(request);
> +		spin_lock(&b->rb_lock);
> +		__intel_engine_remove_signal(engine, request);
> +		spin_unlock(&b->rb_lock);
>   	}
> -
> -	__intel_engine_remove_wait(engine, &request->signaling.wait);
> -
> -	spin_unlock(&b->rb_lock);
> -
> -	request->signaling.wait.seqno = 0;
>   }
>   
>   int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-24 13:09 ` [PATCH] " Tvrtko Ursulin
@ 2018-01-24 14:44   ` Chris Wilson
  2018-01-26  9:42     ` Chris Wilson
  0 siblings, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2018-01-24 14:44 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-24 13:09:37)
> 
> On 22/01/2018 15:41, Chris Wilson wrote:
> > If we remember to cancel the signaler on a request when retiring it
> > (after we know that the request has been signaled), we do not need to
> > carry an additional request in the signaler itself. This prevents an
> > issue whereby the signaler threads may be delayed and hold on to
> > thousands of request references, causing severe memory fragmentation and
> > premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> > and frequent use of inter-engine fences).
> 
> What is starving the signaler thread, which is set to SCHED_FIFO, and 
> can't be tasklets on SNB?

Interrupts. MI_USER_INTERRUPT to be precise, but we have to check all
the other sources on snb as well.

> Before I actually start revieweing the code, which I'd rather avoid :) :
> 
> Is it just not able to process enough requests in it's time-slice 
> (need_resched) so is falling behind? It would be surprising since I 
> would expect it to be much lighter wait processing there, per request, 
> than on the submission paths.

The conclusion is a bit odd, but more or less it's just a pathological
case where interrupts + rt task are contending for one cpu with
submission proceeding on another. Making the signaler lighter was the
intention of the rest of the series, but this patch by itself prevents
the runaway references.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-24 14:44   ` Chris Wilson
@ 2018-01-26  9:42     ` Chris Wilson
  0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-01-26  9:42 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2018-01-24 14:44:01)
> Quoting Tvrtko Ursulin (2018-01-24 13:09:37)
> > 
> > On 22/01/2018 15:41, Chris Wilson wrote:
> > > If we remember to cancel the signaler on a request when retiring it
> > > (after we know that the request has been signaled), we do not need to
> > > carry an additional request in the signaler itself. This prevents an
> > > issue whereby the signaler threads may be delayed and hold on to
> > > thousands of request references, causing severe memory fragmentation and
> > > premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> > > and frequent use of inter-engine fences).
> > 
> > What is starving the signaler thread, which is set to SCHED_FIFO, and 
> > can't be tasklets on SNB?
> 
> Interrupts. MI_USER_INTERRUPT to be precise, but we have to check all
> the other sources on snb as well.
> 
> > Before I actually start revieweing the code, which I'd rather avoid :) :
> > 
> > Is it just not able to process enough requests in it's time-slice 
> > (need_resched) so is falling behind? It would be surprising since I 
> > would expect it to be much lighter wait processing there, per request, 
> > than on the submission paths.
> 
> The conclusion is a bit odd, but more or less it's just a pathological
> case where interrupts + rt task are contending for one cpu with
> submission proceeding on another. Making the signaler lighter was the
> intention of the rest of the series, but this patch by itself prevents
> the runaway references.

Whilst I'm thinking of this, when I hit oom on snb, there were ~3
million requests allocated (/proc/slabinfo) but only ~3 in-flight.
Tracing the request references gave the clue that the only outstanding
ones were in the signaler (there were only 2 sources of references, one
for the active request and one for the signaler; and we accounted for the
active request knowing that they were being retired).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
                   ` (3 preceding siblings ...)
  2018-01-24 13:09 ` [PATCH] " Tvrtko Ursulin
@ 2018-01-31 10:02 ` Chris Wilson
  2018-01-31 17:18 ` Tvrtko Ursulin
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-01-31 10:02 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2018-01-22 15:41:06)
> If we remember to cancel the signaler on a request when retiring it
> (after we know that the request has been signaled), we do not need to
> carry an additional request in the signaler itself. This prevents an
> issue whereby the signaler threads may be delayed and hold on to
> thousands of request references, causing severe memory fragmentation and
> premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> and frequent use of inter-engine fences).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Poke. I've been staring at this problem since December, it would be nice
to get some resolution :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
                   ` (4 preceding siblings ...)
  2018-01-31 10:02 ` Chris Wilson
@ 2018-01-31 17:18 ` Tvrtko Ursulin
  2018-01-31 17:30   ` Chris Wilson
  2018-02-03 10:19   ` [PATCH v2] " Chris Wilson
  2018-02-03 10:40 ` ✓ Fi.CI.BAT: success for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2) Patchwork
  2018-02-03 11:32 ` ✗ Fi.CI.IGT: failure " Patchwork
  7 siblings, 2 replies; 21+ messages in thread
From: Tvrtko Ursulin @ 2018-01-31 17:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 22/01/2018 15:41, Chris Wilson wrote:
> If we remember to cancel the signaler on a request when retiring it
> (after we know that the request has been signaled), we do not need to
> carry an additional request in the signaler itself. This prevents an
> issue whereby the signaler threads may be delayed and hold on to
> thousands of request references, causing severe memory fragmentation and
> premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> and frequent use of inter-engine fences).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c  |   6 +-
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 149 +++++++++++++++++--------------
>   2 files changed, 85 insertions(+), 70 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index a0f451b4a4e8..e4eca6ba0d05 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -441,7 +441,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>   	spin_lock_irq(&request->lock);
>   	if (request->waitboost)
>   		atomic_dec(&request->i915->gt_pm.rps.num_waiters);
> -	dma_fence_signal_locked(&request->fence);
> +	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
> +		dma_fence_signal_locked(&request->fence);
> +	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> +		intel_engine_cancel_signaling(request);
>   	spin_unlock_irq(&request->lock);
>   
>   	i915_priotree_fini(request->i915, &request->priotree);
> @@ -738,6 +741,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>   
>   	/* No zalloc, must clear what we need by hand */
>   	req->global_seqno = 0;
> +	req->signaling.wait.seqno = 0;
>   	req->file_priv = NULL;
>   	req->batch = NULL;
>   	req->capture_list = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 86acac010bb8..e3667dc1e96d 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -235,7 +235,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
>   	struct intel_wait *wait, *n;
>   
>   	if (!b->irq_armed)
> -		goto wakeup_signaler;
> +		return;
>   
>   	/*
>   	 * We only disarm the irq when we are idle (all requests completed),
> @@ -260,14 +260,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
>   	b->waiters = RB_ROOT;
>   
>   	spin_unlock_irq(&b->rb_lock);
> -
> -	/*
> -	 * The signaling thread may be asleep holding a reference to a request,
> -	 * that had its signaling cancelled prior to being preempted. We need
> -	 * to kick the signaler, just in case, to release any such reference.
> -	 */
> -wakeup_signaler:
> -	wake_up_process(b->signaler);
>   }
>   
>   static bool use_fake_irq(const struct intel_breadcrumbs *b)
> @@ -644,6 +636,62 @@ static void signaler_set_rtpriority(void)
>   	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
>   }
>   
> +static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
> +					 struct drm_i915_gem_request *request)
> +{
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> +	lockdep_assert_held(&b->rb_lock);
> +
> +	/*
> +	 * Wake up all other completed waiters and select the
> +	 * next bottom-half for the next user interrupt.
> +	 */
> +	__intel_engine_remove_wait(engine, &request->signaling.wait);
> +
> +	/*
> +	 * Find the next oldest signal. Note that as we have
> +	 * not been holding the lock, another client may

Which lock, request-lock? b->first_signal seems to always be updated 
under the b->rb_lock, so I am not sure of the 
request->signaling.wait.seqno check.

wait.seqno is also updated without the request->lock below, so maybe you 
were talking about the rb_lock after all?

How does wait.seqno becomes 0 outside the rb_lock? Is it due RCU 
business this patch adds? Like it is retired before the signaler runs?

> +	 * have installed an even older signal than the one
> +	 * we just completed - so double check we are still
> +	 * the oldest before picking the next one.
> +	 */
> +	if (request->signaling.wait.seqno) {
> +		if (request == rcu_access_pointer(b->first_signal)) {
> +			struct rb_node *rb = rb_next(&request->signaling.node);
> +			rcu_assign_pointer(b->first_signal,
> +					   rb ? to_signaler(rb) : NULL);
> +		}
> +
> +		rb_erase(&request->signaling.node, &b->signals);
> +		request->signaling.wait.seqno = 0;
> +	}
> +}
> +
> +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> +{
> +	/*
> +	 * See the big warnings for i915_gem_active_get_rcu() and similarly
> +	 * for dma_fence_get_rcu_safe() that explain the intricacies involved
> +	 * here with defeating CPU/compiler speculation and enforcing
> +	 * the required memory barriers.
> +	 */
> +	do {
> +		struct drm_i915_gem_request *request;
> +
> +		request = rcu_dereference(b->first_signal);
> +		if (request)
> +			request = i915_gem_request_get_rcu(request);
> +
> +		barrier();
> +
> +		if (!request || request == rcu_access_pointer(b->first_signal))
> +			return rcu_pointer_handoff(request);
> +
> +		i915_gem_request_put(request);
> +	} while (1);
> +}
> +
>   static int intel_breadcrumbs_signaler(void *arg)
>   {
>   	struct intel_engine_cs *engine = arg;
> @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
>   		 * a new client.
>   		 */
>   		rcu_read_lock();
> -		request = rcu_dereference(b->first_signal);
> -		if (request)
> -			request = i915_gem_request_get_rcu(request);
> +		request = first_signal(b);

get_ prefix would be good to signify a get vs peek. Maybe even _rcu suffix.

>   		rcu_read_unlock();
>   		if (signal_complete(request)) {
> -			local_bh_disable();
> -			dma_fence_signal(&request->fence);
> -			local_bh_enable(); /* kick start the tasklets */
> -
> -			spin_lock_irq(&b->rb_lock);
> -
> -			/* Wake up all other completed waiters and select the
> -			 * next bottom-half for the next user interrupt.
> -			 */
> -			__intel_engine_remove_wait(engine,
> -						   &request->signaling.wait);
> -
> -			/* Find the next oldest signal. Note that as we have
> -			 * not been holding the lock, another client may
> -			 * have installed an even older signal than the one
> -			 * we just completed - so double check we are still
> -			 * the oldest before picking the next one.
> -			 */
> -			if (request == rcu_access_pointer(b->first_signal)) {
> -				struct rb_node *rb =
> -					rb_next(&request->signaling.node);
> -				rcu_assign_pointer(b->first_signal,
> -						   rb ? to_signaler(rb) : NULL);
> +			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +				      &request->fence.flags)) {
> +				local_bh_disable();
> +				dma_fence_signal(&request->fence);
> +				local_bh_enable(); /* kick start the tasklets */
>   			}
> -			rb_erase(&request->signaling.node, &b->signals);
> -			RB_CLEAR_NODE(&request->signaling.node);
> -
> -			spin_unlock_irq(&b->rb_lock);
>   
> -			i915_gem_request_put(request);
> +			if (request->signaling.wait.seqno) {
> +				spin_lock_irq(&b->rb_lock);
> +				__intel_engine_remove_signal(engine, request);
> +				spin_unlock_irq(&b->rb_lock);

This is safe against double invocation on a single request (race between 
retire and signaler) because of RB_EMPTY_NODE guard in 
__intel_engine_remove_wait?

> +			}
>   
>   			/* If the engine is saturated we may be continually
>   			 * processing completed requests. This angers the
> @@ -712,19 +740,17 @@ static int intel_breadcrumbs_signaler(void *arg)
>   			 */
>   			do_schedule = need_resched();
>   		}
> +		i915_gem_request_put(request);
>   
>   		if (unlikely(do_schedule)) {
>   			if (kthread_should_park())
>   				kthread_parkme();
>   
> -			if (unlikely(kthread_should_stop())) {
> -				i915_gem_request_put(request);
> +			if (unlikely(kthread_should_stop()))
>   				break;
> -			}
>   
>   			schedule();
>   		}
> -		i915_gem_request_put(request);
>   	} while (1);
>   	__set_current_state(TASK_RUNNING);
>   
> @@ -753,12 +779,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>   	if (!seqno)
>   		return;
>   
> +	spin_lock(&b->rb_lock);
> +
> +	GEM_BUG_ON(request->signaling.wait.seqno);
>   	request->signaling.wait.tsk = b->signaler;
>   	request->signaling.wait.request = request;
>   	request->signaling.wait.seqno = seqno;
> -	i915_gem_request_get(request);
> -
> -	spin_lock(&b->rb_lock);
>   
>   	/* First add ourselves into the list of waiters, but register our
>   	 * bottom-half as the signaller thread. As per usual, only the oldest
> @@ -797,7 +823,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>   			rcu_assign_pointer(b->first_signal, request);
>   	} else {
>   		__intel_engine_remove_wait(engine, &request->signaling.wait);
> -		i915_gem_request_put(request);
> +		request->signaling.wait.seqno = 0;
>   		wakeup = false;
>   	}
>   
> @@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>   
>   void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
>   {
> -	struct intel_engine_cs *engine = request->engine;
> -	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
>   	GEM_BUG_ON(!irqs_disabled());
>   	lockdep_assert_held(&request->lock);
> -	GEM_BUG_ON(!request->signaling.wait.seqno);
>   
> -	spin_lock(&b->rb_lock);
> +	if (request->signaling.wait.seqno) {

Do you need some special annotation like READ_ONCE on wait.seqno every 
time it is used for decisions like this one?

> +		struct intel_engine_cs *engine = request->engine;
> +		struct intel_breadcrumbs *b = &engine->breadcrumbs;
>   
> -	if (!RB_EMPTY_NODE(&request->signaling.node)) {
> -		if (request == rcu_access_pointer(b->first_signal)) {
> -			struct rb_node *rb =
> -				rb_next(&request->signaling.node);
> -			rcu_assign_pointer(b->first_signal,
> -					   rb ? to_signaler(rb) : NULL);
> -		}
> -		rb_erase(&request->signaling.node, &b->signals);
> -		RB_CLEAR_NODE(&request->signaling.node);
> -		i915_gem_request_put(request);
> +		spin_lock(&b->rb_lock);
> +		__intel_engine_remove_signal(engine, request);
> +		spin_unlock(&b->rb_lock);
>   	}
> -
> -	__intel_engine_remove_wait(engine, &request->signaling.wait);
> -
> -	spin_unlock(&b->rb_lock);
> -
> -	request->signaling.wait.seqno = 0;
>   }
>   
>   int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
> 

Not sure yet, because I dread adding more complex RCU rules. I still 
can't understand how submission can overtake interrupt processing and 
signaling. I would have guessed submission path is much more CPU heavy 
and also lower priority.

At minimum I need another pass over it. But anyway, just so you know I 
started looking at it.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-31 17:18 ` Tvrtko Ursulin
@ 2018-01-31 17:30   ` Chris Wilson
  2018-01-31 20:40     ` Chris Wilson
  2018-02-03 10:19   ` [PATCH v2] " Chris Wilson
  1 sibling, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2018-01-31 17:30 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-31 17:18:55)
> 
> On 22/01/2018 15:41, Chris Wilson wrote:
> > +static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
> > +                                      struct drm_i915_gem_request *request)
> > +{
> > +     struct intel_breadcrumbs *b = &engine->breadcrumbs;
> > +
> > +     lockdep_assert_held(&b->rb_lock);
> > +
> > +     /*
> > +      * Wake up all other completed waiters and select the
> > +      * next bottom-half for the next user interrupt.
> > +      */
> > +     __intel_engine_remove_wait(engine, &request->signaling.wait);
> > +
> > +     /*
> > +      * Find the next oldest signal. Note that as we have
> > +      * not been holding the lock, another client may
> 
> Which lock, request-lock? b->first_signal seems to always be updated 
> under the b->rb_lock, so I am not sure of the 
> request->signaling.wait.seqno check.
> 
> wait.seqno is also updated without the request->lock below, so maybe you 
> were talking about the rb_lock after all?

What it means here is that since we took b->first_signal without
holding the rb_lock, by the time we get to here (and have taken the
rb_lock) b->first_signal may have changed and indeed someone else may
have already retired the request and canceled the signaling. Or inserted
a new signal into the tree. (iirc, this comment is unmodified.)
 
> How does wait.seqno becomes 0 outside the rb_lock? Is it due RCU 
> business this patch adds? Like it is retired before the signaler runs?

Not outside of the rb_lock, just before we notice.
 
> > +      * have installed an even older signal than the one
> > +      * we just completed - so double check we are still
> > +      * the oldest before picking the next one.
> > +      */
> > +     if (request->signaling.wait.seqno) {
> > +             if (request == rcu_access_pointer(b->first_signal)) {
> > +                     struct rb_node *rb = rb_next(&request->signaling.node);
> > +                     rcu_assign_pointer(b->first_signal,
> > +                                        rb ? to_signaler(rb) : NULL);
> > +             }
> > +
> > +             rb_erase(&request->signaling.node, &b->signals);
> > +             request->signaling.wait.seqno = 0;
> > +     }
> > +}
> > +
> > +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> > +{
> > +     /*
> > +      * See the big warnings for i915_gem_active_get_rcu() and similarly
> > +      * for dma_fence_get_rcu_safe() that explain the intricacies involved
> > +      * here with defeating CPU/compiler speculation and enforcing
> > +      * the required memory barriers.
> > +      */
> > +     do {
> > +             struct drm_i915_gem_request *request;
> > +
> > +             request = rcu_dereference(b->first_signal);
> > +             if (request)
> > +                     request = i915_gem_request_get_rcu(request);
> > +
> > +             barrier();
> > +
> > +             if (!request || request == rcu_access_pointer(b->first_signal))
> > +                     return rcu_pointer_handoff(request);
> > +
> > +             i915_gem_request_put(request);
> > +     } while (1);
> > +}
> > +
> >   static int intel_breadcrumbs_signaler(void *arg)
> >   {
> >       struct intel_engine_cs *engine = arg;
> > @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> >                * a new client.
> >                */
> >               rcu_read_lock();
> > -             request = rcu_dereference(b->first_signal);
> > -             if (request)
> > -                     request = i915_gem_request_get_rcu(request);
> > +             request = first_signal(b);
> 
> get_ prefix would be good to signify a get vs peek. Maybe even _rcu suffix.

Sold.

> >               rcu_read_unlock();
> >               if (signal_complete(request)) {
> > -                     local_bh_disable();
> > -                     dma_fence_signal(&request->fence);
> > -                     local_bh_enable(); /* kick start the tasklets */
> > -
> > -                     spin_lock_irq(&b->rb_lock);
> > -
> > -                     /* Wake up all other completed waiters and select the
> > -                      * next bottom-half for the next user interrupt.
> > -                      */
> > -                     __intel_engine_remove_wait(engine,
> > -                                                &request->signaling.wait);
> > -
> > -                     /* Find the next oldest signal. Note that as we have
> > -                      * not been holding the lock, another client may
> > -                      * have installed an even older signal than the one
> > -                      * we just completed - so double check we are still
> > -                      * the oldest before picking the next one.
> > -                      */
> > -                     if (request == rcu_access_pointer(b->first_signal)) {
> > -                             struct rb_node *rb =
> > -                                     rb_next(&request->signaling.node);
> > -                             rcu_assign_pointer(b->first_signal,
> > -                                                rb ? to_signaler(rb) : NULL);
> > +                     if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> > +                                   &request->fence.flags)) {
> > +                             local_bh_disable();
> > +                             dma_fence_signal(&request->fence);
> > +                             local_bh_enable(); /* kick start the tasklets */
> >                       }
> > -                     rb_erase(&request->signaling.node, &b->signals);
> > -                     RB_CLEAR_NODE(&request->signaling.node);
> > -
> > -                     spin_unlock_irq(&b->rb_lock);
> >   
> > -                     i915_gem_request_put(request);
> > +                     if (request->signaling.wait.seqno) {
> > +                             spin_lock_irq(&b->rb_lock);
> > +                             __intel_engine_remove_signal(engine, request);
> > +                             spin_unlock_irq(&b->rb_lock);
> 
> This is safe against double invocation on a single request (race between 
> retire and signaler) because of RB_EMPTY_NODE guard in 
> __intel_engine_remove_wait?

Yes, and wait.seqno==0 guard for the signaler tree.

> > @@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> >   
> >   void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
> >   {
> > -     struct intel_engine_cs *engine = request->engine;
> > -     struct intel_breadcrumbs *b = &engine->breadcrumbs;
> > -
> >       GEM_BUG_ON(!irqs_disabled());
> >       lockdep_assert_held(&request->lock);
> > -     GEM_BUG_ON(!request->signaling.wait.seqno);
> >   
> > -     spin_lock(&b->rb_lock);
> > +     if (request->signaling.wait.seqno) {
> 
> Do you need some special annotation like READ_ONCE on wait.seqno every 
> time it is used for decisions like this one?

For humans, yes ;)	
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-31 17:30   ` Chris Wilson
@ 2018-01-31 20:40     ` Chris Wilson
  2018-01-31 20:43       ` Chris Wilson
  0 siblings, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2018-01-31 20:40 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2018-01-31 17:30:46)
> Quoting Tvrtko Ursulin (2018-01-31 17:18:55)
> > 
> > On 22/01/2018 15:41, Chris Wilson wrote:
> > > +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> > > +{
> > > +     /*
> > > +      * See the big warnings for i915_gem_active_get_rcu() and similarly
> > > +      * for dma_fence_get_rcu_safe() that explain the intricacies involved
> > > +      * here with defeating CPU/compiler speculation and enforcing
> > > +      * the required memory barriers.
> > > +      */
> > > +     do {
> > > +             struct drm_i915_gem_request *request;
> > > +
> > > +             request = rcu_dereference(b->first_signal);
> > > +             if (request)
> > > +                     request = i915_gem_request_get_rcu(request);
> > > +
> > > +             barrier();
> > > +
> > > +             if (!request || request == rcu_access_pointer(b->first_signal))
> > > +                     return rcu_pointer_handoff(request);
> > > +
> > > +             i915_gem_request_put(request);
> > > +     } while (1);
> > > +}
> > > +
> > >   static int intel_breadcrumbs_signaler(void *arg)
> > >   {
> > >       struct intel_engine_cs *engine = arg;
> > > @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> > >                * a new client.
> > >                */
> > >               rcu_read_lock();
> > > -             request = rcu_dereference(b->first_signal);
> > > -             if (request)
> > > -                     request = i915_gem_request_get_rcu(request);
> > > +             request = first_signal(b);
> > 
> > get_ prefix would be good to signify a get vs peek. Maybe even _rcu suffix.
> 
> Sold.

Whilst you are looking at this, I should just say that first_signal() is
what we should have been doing all this time; it's really a very obscure
bug fix. And fwiw, the s/rbtree/list/ patch eliminates it.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-31 20:40     ` Chris Wilson
@ 2018-01-31 20:43       ` Chris Wilson
  0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-01-31 20:43 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2018-01-31 20:40:03)
> Quoting Chris Wilson (2018-01-31 17:30:46)
> > Quoting Tvrtko Ursulin (2018-01-31 17:18:55)
> > > 
> > > On 22/01/2018 15:41, Chris Wilson wrote:
> > > > +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> > > > +{
> > > > +     /*
> > > > +      * See the big warnings for i915_gem_active_get_rcu() and similarly
> > > > +      * for dma_fence_get_rcu_safe() that explain the intricacies involved
> > > > +      * here with defeating CPU/compiler speculation and enforcing
> > > > +      * the required memory barriers.
> > > > +      */
> > > > +     do {
> > > > +             struct drm_i915_gem_request *request;
> > > > +
> > > > +             request = rcu_dereference(b->first_signal);
> > > > +             if (request)
> > > > +                     request = i915_gem_request_get_rcu(request);
> > > > +
> > > > +             barrier();
> > > > +
> > > > +             if (!request || request == rcu_access_pointer(b->first_signal))
> > > > +                     return rcu_pointer_handoff(request);
> > > > +
> > > > +             i915_gem_request_put(request);
> > > > +     } while (1);
> > > > +}
> > > > +
> > > >   static int intel_breadcrumbs_signaler(void *arg)
> > > >   {
> > > >       struct intel_engine_cs *engine = arg;
> > > > @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> > > >                * a new client.
> > > >                */
> > > >               rcu_read_lock();
> > > > -             request = rcu_dereference(b->first_signal);
> > > > -             if (request)
> > > > -                     request = i915_gem_request_get_rcu(request);
> > > > +             request = first_signal(b);
> > > 
> > > get_ prefix would be good to signify a get vs peek. Maybe even _rcu suffix.
> > 
> > Sold.
> 
> Whilst you are looking at this, I should just say that first_signal() is
> what we should have been doing all this time; it's really a very obscure
> bug fix. And fwiw, the s/rbtree/list/ patch eliminates it.

Hmm, actually no, no underlying bug here as the reference was previously
carried by the signaler, and with its removal do we need to do the
double dance. (It's a bad sign when I can remember having that exact
conversation with myself; and forgot until too late.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-01-31 17:18 ` Tvrtko Ursulin
  2018-01-31 17:30   ` Chris Wilson
@ 2018-02-03 10:19   ` Chris Wilson
  2018-02-05 13:23     ` Tvrtko Ursulin
  1 sibling, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2018-02-03 10:19 UTC (permalink / raw)
  To: intel-gfx

If we remember to cancel the signaler on a request when retiring it
(after we know that the request has been signaled), we do not need to
carry an additional request in the signaler itself. This prevents an
issue whereby the signaler threads may be delayed and hold on to
thousands of request references, causing severe memory fragmentation and
premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
and frequent use of inter-engine fences).

v2: Rename first_signal(), document reads outside of locks.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c  |   6 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 150 +++++++++++++++++--------------
 2 files changed, 86 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5ff57211ee06..dc7cc2c55bc7 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -555,7 +555,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	spin_lock_irq(&request->lock);
 	if (request->waitboost)
 		atomic_dec(&request->i915->gt_pm.rps.num_waiters);
-	dma_fence_signal_locked(&request->fence);
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
+		dma_fence_signal_locked(&request->fence);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+		intel_engine_cancel_signaling(request);
 	spin_unlock_irq(&request->lock);
 
 	i915_priotree_fini(request->i915, &request->priotree);
@@ -864,6 +867,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	/* No zalloc, must clear what we need by hand */
 	req->global_seqno = 0;
+	req->signaling.wait.seqno = 0;
 	req->file_priv = NULL;
 	req->batch = NULL;
 	req->capture_list = NULL;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 62b2a20bc24e..efbc627a2a25 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -224,7 +224,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 	struct intel_wait *wait, *n;
 
 	if (!b->irq_armed)
-		goto wakeup_signaler;
+		return;
 
 	/*
 	 * We only disarm the irq when we are idle (all requests completed),
@@ -249,14 +249,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 	b->waiters = RB_ROOT;
 
 	spin_unlock_irq(&b->rb_lock);
-
-	/*
-	 * The signaling thread may be asleep holding a reference to a request,
-	 * that had its signaling cancelled prior to being preempted. We need
-	 * to kick the signaler, just in case, to release any such reference.
-	 */
-wakeup_signaler:
-	wake_up_process(b->signaler);
 }
 
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -633,6 +625,63 @@ static void signaler_set_rtpriority(void)
 	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
 }
 
+static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
+					 struct drm_i915_gem_request *request)
+{
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	lockdep_assert_held(&b->rb_lock);
+
+	/*
+	 * Wake up all other completed waiters and select the
+	 * next bottom-half for the next user interrupt.
+	 */
+	__intel_engine_remove_wait(engine, &request->signaling.wait);
+
+	/*
+	 * Find the next oldest signal. Note that as we have
+	 * not been holding the lock, another client may
+	 * have installed an even older signal than the one
+	 * we just completed - so double check we are still
+	 * the oldest before picking the next one.
+	 */
+	if (request->signaling.wait.seqno) {
+		if (request == rcu_access_pointer(b->first_signal)) {
+			struct rb_node *rb = rb_next(&request->signaling.node);
+			rcu_assign_pointer(b->first_signal,
+					   rb ? to_signaler(rb) : NULL);
+		}
+
+		rb_erase(&request->signaling.node, &b->signals);
+		request->signaling.wait.seqno = 0;
+	}
+}
+
+static struct drm_i915_gem_request *
+get_first_signal_rcu(struct intel_breadcrumbs *b)
+{
+	/*
+	 * See the big warnings for i915_gem_active_get_rcu() and similarly
+	 * for dma_fence_get_rcu_safe() that explain the intricacies involved
+	 * here with defeating CPU/compiler speculation and enforcing
+	 * the required memory barriers.
+	 */
+	do {
+		struct drm_i915_gem_request *request;
+
+		request = rcu_dereference(b->first_signal);
+		if (request)
+			request = i915_gem_request_get_rcu(request);
+
+		barrier();
+
+		if (!request || request == rcu_access_pointer(b->first_signal))
+			return rcu_pointer_handoff(request);
+
+		i915_gem_request_put(request);
+	} while (1);
+}
+
 static int intel_breadcrumbs_signaler(void *arg)
 {
 	struct intel_engine_cs *engine = arg;
@@ -656,41 +705,21 @@ static int intel_breadcrumbs_signaler(void *arg)
 		 * a new client.
 		 */
 		rcu_read_lock();
-		request = rcu_dereference(b->first_signal);
-		if (request)
-			request = i915_gem_request_get_rcu(request);
+		request = get_first_signal_rcu(b);
 		rcu_read_unlock();
 		if (signal_complete(request)) {
-			local_bh_disable();
-			dma_fence_signal(&request->fence);
-			local_bh_enable(); /* kick start the tasklets */
-
-			spin_lock_irq(&b->rb_lock);
-
-			/* Wake up all other completed waiters and select the
-			 * next bottom-half for the next user interrupt.
-			 */
-			__intel_engine_remove_wait(engine,
-						   &request->signaling.wait);
-
-			/* Find the next oldest signal. Note that as we have
-			 * not been holding the lock, another client may
-			 * have installed an even older signal than the one
-			 * we just completed - so double check we are still
-			 * the oldest before picking the next one.
-			 */
-			if (request == rcu_access_pointer(b->first_signal)) {
-				struct rb_node *rb =
-					rb_next(&request->signaling.node);
-				rcu_assign_pointer(b->first_signal,
-						   rb ? to_signaler(rb) : NULL);
+			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+				      &request->fence.flags)) {
+				local_bh_disable();
+				dma_fence_signal(&request->fence);
+				local_bh_enable(); /* kick start the tasklets */
 			}
-			rb_erase(&request->signaling.node, &b->signals);
-			RB_CLEAR_NODE(&request->signaling.node);
-
-			spin_unlock_irq(&b->rb_lock);
 
-			i915_gem_request_put(request);
+			if (READ_ONCE(request->signaling.wait.seqno)) {
+				spin_lock_irq(&b->rb_lock);
+				__intel_engine_remove_signal(engine, request);
+				spin_unlock_irq(&b->rb_lock);
+			}
 
 			/* If the engine is saturated we may be continually
 			 * processing completed requests. This angers the
@@ -701,19 +730,17 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			do_schedule = need_resched();
 		}
+		i915_gem_request_put(request);
 
 		if (unlikely(do_schedule)) {
 			if (kthread_should_park())
 				kthread_parkme();
 
-			if (unlikely(kthread_should_stop())) {
-				i915_gem_request_put(request);
+			if (unlikely(kthread_should_stop()))
 				break;
-			}
 
 			schedule();
 		}
-		i915_gem_request_put(request);
 	} while (1);
 	__set_current_state(TASK_RUNNING);
 
@@ -742,12 +769,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 	if (!seqno)
 		return;
 
+	spin_lock(&b->rb_lock);
+
+	GEM_BUG_ON(request->signaling.wait.seqno);
 	request->signaling.wait.tsk = b->signaler;
 	request->signaling.wait.request = request;
 	request->signaling.wait.seqno = seqno;
-	i915_gem_request_get(request);
-
-	spin_lock(&b->rb_lock);
 
 	/* First add ourselves into the list of waiters, but register our
 	 * bottom-half as the signaller thread. As per usual, only the oldest
@@ -786,7 +813,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 			rcu_assign_pointer(b->first_signal, request);
 	} else {
 		__intel_engine_remove_wait(engine, &request->signaling.wait);
-		i915_gem_request_put(request);
+		request->signaling.wait.seqno = 0;
 		wakeup = false;
 	}
 
@@ -798,32 +825,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
 
 void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
 	GEM_BUG_ON(!irqs_disabled());
 	lockdep_assert_held(&request->lock);
-	GEM_BUG_ON(!request->signaling.wait.seqno);
 
-	spin_lock(&b->rb_lock);
+	if (READ_ONCE(request->signaling.wait.seqno)) {
+		struct intel_engine_cs *engine = request->engine;
+		struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	if (!RB_EMPTY_NODE(&request->signaling.node)) {
-		if (request == rcu_access_pointer(b->first_signal)) {
-			struct rb_node *rb =
-				rb_next(&request->signaling.node);
-			rcu_assign_pointer(b->first_signal,
-					   rb ? to_signaler(rb) : NULL);
-		}
-		rb_erase(&request->signaling.node, &b->signals);
-		RB_CLEAR_NODE(&request->signaling.node);
-		i915_gem_request_put(request);
+		spin_lock(&b->rb_lock);
+		__intel_engine_remove_signal(engine, request);
+		spin_unlock(&b->rb_lock);
 	}
-
-	__intel_engine_remove_wait(engine, &request->signaling.wait);
-
-	spin_unlock(&b->rb_lock);
-
-	request->signaling.wait.seqno = 0;
 }
 
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
  2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
                   ` (5 preceding siblings ...)
  2018-01-31 17:18 ` Tvrtko Ursulin
@ 2018-02-03 10:40 ` Patchwork
  2018-02-03 11:32 ` ✗ Fi.CI.IGT: failure " Patchwork
  7 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2018-02-03 10:40 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
URL   : https://patchwork.freedesktop.org/series/36908/
State : success

== Summary ==

Series 36908v2 drm/i915/breadcrumbs: Drop request reference for the signaler thread
https://patchwork.freedesktop.org/api/1.0/series/36908/revisions/2/mbox/

Test gem_mmap_gtt:
        Subgroup basic-small-bo-tiledx:
                pass       -> FAIL       (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713

fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:421s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:425s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:370s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:481s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:280s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:476s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:487s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:465s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:463s
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:568s
fi-elk-e7500     total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:410s
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:277s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:513s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:386s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:395s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:408s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:468s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:410s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:460s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:495s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:452s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:497s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:572s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:428s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:506s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:527s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:483s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:473s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:414s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:429s
fi-snb-2520m     total:245  pass:211  dwarn:0   dfail:0   fail:0   skip:33 
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:392s
Blacklisted hosts:
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:470s

2e76a2952923eba64c4f9baf461613bc42ee997a drm-tip: 2018y-02m-02d-20h-33m-12s UTC integration manifest
60b9a2f8d1e9 drm/i915/breadcrumbs: Drop request reference for the signaler thread

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7878/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* ✗ Fi.CI.IGT: failure for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
  2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
                   ` (6 preceding siblings ...)
  2018-02-03 10:40 ` ✓ Fi.CI.BAT: success for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2) Patchwork
@ 2018-02-03 11:32 ` Patchwork
  2018-02-05 16:09   ` Chris Wilson
  7 siblings, 1 reply; 21+ messages in thread
From: Patchwork @ 2018-02-03 11:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
URL   : https://patchwork.freedesktop.org/series/36908/
State : failure

== Summary ==

Test perf_pmu:
        Subgroup busy-double-start-vcs0:
                pass       -> INCOMPLETE (shard-apl)
Test kms_cursor_legacy:
        Subgroup flip-vs-cursor-atomic:
                fail       -> PASS       (shard-hsw) fdo#102670 +1
Test kms_flip:
        Subgroup flip-vs-absolute-wf_vblank-interruptible:
                pass       -> FAIL       (shard-hsw) fdo#100368
        Subgroup 2x-flip-vs-expired-vblank-interruptible:
                fail       -> PASS       (shard-hsw) fdo#102887
        Subgroup 2x-plain-flip-ts-check:
                fail       -> PASS       (shard-hsw)
Test kms_rotation_crc:
        Subgroup bad-tiling:
                pass       -> FAIL       (shard-apl) fdo#103925
Test gem_eio:
        Subgroup in-flight-contexts:
                dmesg-warn -> PASS       (shard-snb) fdo#104058
Test kms_draw_crc:
        Subgroup fill-fb:
                pass       -> SKIP       (shard-snb)

fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
fdo#104058 https://bugs.freedesktop.org/show_bug.cgi?id=104058

shard-apl        total:2795 pass:1724 dwarn:1   dfail:0   fail:20  skip:1049 time:12065s
shard-hsw        total:2836 pass:1732 dwarn:1   dfail:0   fail:12  skip:1090 time:11572s
shard-snb        total:2836 pass:1327 dwarn:1   dfail:0   fail:10  skip:1498 time:6385s
Blacklisted hosts:
shard-kbl        total:2836 pass:1870 dwarn:3   dfail:0   fail:22  skip:941 time:9372s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7878/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-02-03 10:19   ` [PATCH v2] " Chris Wilson
@ 2018-02-05 13:23     ` Tvrtko Ursulin
  2018-02-05 13:36       ` Chris Wilson
  0 siblings, 1 reply; 21+ messages in thread
From: Tvrtko Ursulin @ 2018-02-05 13:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 03/02/2018 10:19, Chris Wilson wrote:
> If we remember to cancel the signaler on a request when retiring it
> (after we know that the request has been signaled), we do not need to
> carry an additional request in the signaler itself. This prevents an
> issue whereby the signaler threads may be delayed and hold on to
> thousands of request references, causing severe memory fragmentation and
> premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> and frequent use of inter-engine fences).
> 
> v2: Rename first_signal(), document reads outside of locks.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c  |   6 +-
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 150 +++++++++++++++++--------------
>   2 files changed, 86 insertions(+), 70 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 5ff57211ee06..dc7cc2c55bc7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -555,7 +555,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>   	spin_lock_irq(&request->lock);
>   	if (request->waitboost)
>   		atomic_dec(&request->i915->gt_pm.rps.num_waiters);
> -	dma_fence_signal_locked(&request->fence);
> +	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
> +		dma_fence_signal_locked(&request->fence);
> +	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> +		intel_engine_cancel_signaling(request);
>   	spin_unlock_irq(&request->lock);
>   
>   	i915_priotree_fini(request->i915, &request->priotree);
> @@ -864,6 +867,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>   
>   	/* No zalloc, must clear what we need by hand */
>   	req->global_seqno = 0;
> +	req->signaling.wait.seqno = 0;
>   	req->file_priv = NULL;
>   	req->batch = NULL;
>   	req->capture_list = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 62b2a20bc24e..efbc627a2a25 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -224,7 +224,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
>   	struct intel_wait *wait, *n;
>   
>   	if (!b->irq_armed)
> -		goto wakeup_signaler;
> +		return;
>   
>   	/*
>   	 * We only disarm the irq when we are idle (all requests completed),
> @@ -249,14 +249,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
>   	b->waiters = RB_ROOT;
>   
>   	spin_unlock_irq(&b->rb_lock);
> -
> -	/*
> -	 * The signaling thread may be asleep holding a reference to a request,
> -	 * that had its signaling cancelled prior to being preempted. We need
> -	 * to kick the signaler, just in case, to release any such reference.
> -	 */
> -wakeup_signaler:
> -	wake_up_process(b->signaler);
>   }
>   
>   static bool use_fake_irq(const struct intel_breadcrumbs *b)
> @@ -633,6 +625,63 @@ static void signaler_set_rtpriority(void)
>   	 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
>   }
>   
> +static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
> +					 struct drm_i915_gem_request *request)
> +{
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> +	lockdep_assert_held(&b->rb_lock);
> +
> +	/*
> +	 * Wake up all other completed waiters and select the
> +	 * next bottom-half for the next user interrupt.
> +	 */
> +	__intel_engine_remove_wait(engine, &request->signaling.wait);
> +
> +	/*
> +	 * Find the next oldest signal. Note that as we have
> +	 * not been holding the lock, another client may
> +	 * have installed an even older signal than the one
> +	 * we just completed - so double check we are still
> +	 * the oldest before picking the next one.
> +	 */
> +	if (request->signaling.wait.seqno) {
> +		if (request == rcu_access_pointer(b->first_signal)) {
> +			struct rb_node *rb = rb_next(&request->signaling.node);
> +			rcu_assign_pointer(b->first_signal,
> +					   rb ? to_signaler(rb) : NULL);
> +		}
> +
> +		rb_erase(&request->signaling.node, &b->signals);
> +		request->signaling.wait.seqno = 0;
> +	}
> +}
> +
> +static struct drm_i915_gem_request *
> +get_first_signal_rcu(struct intel_breadcrumbs *b)
> +{
> +	/*
> +	 * See the big warnings for i915_gem_active_get_rcu() and similarly
> +	 * for dma_fence_get_rcu_safe() that explain the intricacies involved
> +	 * here with defeating CPU/compiler speculation and enforcing
> +	 * the required memory barriers.
> +	 */
> +	do {
> +		struct drm_i915_gem_request *request;
> +
> +		request = rcu_dereference(b->first_signal);
> +		if (request)
> +			request = i915_gem_request_get_rcu(request);
> +
> +		barrier();
> +
> +		if (!request || request == rcu_access_pointer(b->first_signal))
> +			return rcu_pointer_handoff(request);
> +
> +		i915_gem_request_put(request);
> +	} while (1);
> +}
> +
>   static int intel_breadcrumbs_signaler(void *arg)
>   {
>   	struct intel_engine_cs *engine = arg;
> @@ -656,41 +705,21 @@ static int intel_breadcrumbs_signaler(void *arg)
>   		 * a new client.
>   		 */
>   		rcu_read_lock();
> -		request = rcu_dereference(b->first_signal);
> -		if (request)
> -			request = i915_gem_request_get_rcu(request);
> +		request = get_first_signal_rcu(b);
>   		rcu_read_unlock();
>   		if (signal_complete(request)) {
> -			local_bh_disable();
> -			dma_fence_signal(&request->fence);
> -			local_bh_enable(); /* kick start the tasklets */
> -
> -			spin_lock_irq(&b->rb_lock);
> -
> -			/* Wake up all other completed waiters and select the
> -			 * next bottom-half for the next user interrupt.
> -			 */
> -			__intel_engine_remove_wait(engine,
> -						   &request->signaling.wait);
> -
> -			/* Find the next oldest signal. Note that as we have
> -			 * not been holding the lock, another client may
> -			 * have installed an even older signal than the one
> -			 * we just completed - so double check we are still
> -			 * the oldest before picking the next one.
> -			 */
> -			if (request == rcu_access_pointer(b->first_signal)) {
> -				struct rb_node *rb =
> -					rb_next(&request->signaling.node);
> -				rcu_assign_pointer(b->first_signal,
> -						   rb ? to_signaler(rb) : NULL);
> +			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> +				      &request->fence.flags)) {
> +				local_bh_disable();
> +				dma_fence_signal(&request->fence);
> +				local_bh_enable(); /* kick start the tasklets */
>   			}
> -			rb_erase(&request->signaling.node, &b->signals);
> -			RB_CLEAR_NODE(&request->signaling.node);
> -
> -			spin_unlock_irq(&b->rb_lock);
>   
> -			i915_gem_request_put(request);
> +			if (READ_ONCE(request->signaling.wait.seqno)) {
> +				spin_lock_irq(&b->rb_lock);
> +				__intel_engine_remove_signal(engine, request);
> +				spin_unlock_irq(&b->rb_lock);
> +			}

How would you view taking the request->lock over this block in the 
signaler and then being able to call simply 
intel_engine_cancel_signaling - ending up with very similar code as in 
i915_gem_request_retire?

Only difference would be the tasklet kicking, but maybe still it would 
be possible to consolidate the two in one helper.

__i915_gem_request_retire_signaling(req, bool kick_tasklets)
{
	if (!DMA_FENCE_FLAG_SIGNALED_BIT) {
		dma_fence_signal_locked(...);
		if (kick_tasklets) {
			local_bh_disable();
			local_bh_enable();
		}
	}

	if (DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT)
		intel_engine_cancel_signaling(...);
}

i915_gem_request_retire_signaling(req)
{
	spin_lock_irq(req->lock);
	__i915_gem_request_retire_signaling(req, true);
	spin_unlock_irq(req->lock);
}

And call the former from request retire, and later from signaler?

Regards,

Tvrtko

>   
>   			/* If the engine is saturated we may be continually
>   			 * processing completed requests. This angers the
> @@ -701,19 +730,17 @@ static int intel_breadcrumbs_signaler(void *arg)
>   			 */
>   			do_schedule = need_resched();
>   		}
> +		i915_gem_request_put(request);
>   
>   		if (unlikely(do_schedule)) {
>   			if (kthread_should_park())
>   				kthread_parkme();
>   
> -			if (unlikely(kthread_should_stop())) {
> -				i915_gem_request_put(request);
> +			if (unlikely(kthread_should_stop()))
>   				break;
> -			}
>   
>   			schedule();
>   		}
> -		i915_gem_request_put(request);
>   	} while (1);
>   	__set_current_state(TASK_RUNNING);
>   
> @@ -742,12 +769,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>   	if (!seqno)
>   		return;
>   
> +	spin_lock(&b->rb_lock);
> +
> +	GEM_BUG_ON(request->signaling.wait.seqno);
>   	request->signaling.wait.tsk = b->signaler;
>   	request->signaling.wait.request = request;
>   	request->signaling.wait.seqno = seqno;
> -	i915_gem_request_get(request);
> -
> -	spin_lock(&b->rb_lock);
>   
>   	/* First add ourselves into the list of waiters, but register our
>   	 * bottom-half as the signaller thread. As per usual, only the oldest
> @@ -786,7 +813,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>   			rcu_assign_pointer(b->first_signal, request);
>   	} else {
>   		__intel_engine_remove_wait(engine, &request->signaling.wait);
> -		i915_gem_request_put(request);
> +		request->signaling.wait.seqno = 0;
>   		wakeup = false;
>   	}
>   
> @@ -798,32 +825,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>   
>   void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
>   {
> -	struct intel_engine_cs *engine = request->engine;
> -	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
>   	GEM_BUG_ON(!irqs_disabled());
>   	lockdep_assert_held(&request->lock);
> -	GEM_BUG_ON(!request->signaling.wait.seqno);
>   
> -	spin_lock(&b->rb_lock);
> +	if (READ_ONCE(request->signaling.wait.seqno)) {
> +		struct intel_engine_cs *engine = request->engine;
> +		struct intel_breadcrumbs *b = &engine->breadcrumbs;
>   
> -	if (!RB_EMPTY_NODE(&request->signaling.node)) {
> -		if (request == rcu_access_pointer(b->first_signal)) {
> -			struct rb_node *rb =
> -				rb_next(&request->signaling.node);
> -			rcu_assign_pointer(b->first_signal,
> -					   rb ? to_signaler(rb) : NULL);
> -		}
> -		rb_erase(&request->signaling.node, &b->signals);
> -		RB_CLEAR_NODE(&request->signaling.node);
> -		i915_gem_request_put(request);
> +		spin_lock(&b->rb_lock);
> +		__intel_engine_remove_signal(engine, request);
> +		spin_unlock(&b->rb_lock);
>   	}
> -
> -	__intel_engine_remove_wait(engine, &request->signaling.wait);
> -
> -	spin_unlock(&b->rb_lock);
> -
> -	request->signaling.wait.seqno = 0;
>   }
>   
>   int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-02-05 13:23     ` Tvrtko Ursulin
@ 2018-02-05 13:36       ` Chris Wilson
  2018-02-05 13:39         ` Chris Wilson
  2018-02-05 13:45         ` Tvrtko Ursulin
  0 siblings, 2 replies; 21+ messages in thread
From: Chris Wilson @ 2018-02-05 13:36 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-02-05 13:23:54)
> 
> On 03/02/2018 10:19, Chris Wilson wrote:
> > @@ -656,41 +705,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> >                * a new client.
> >                */
> >               rcu_read_lock();
> > -             request = rcu_dereference(b->first_signal);
> > -             if (request)
> > -                     request = i915_gem_request_get_rcu(request);
> > +             request = get_first_signal_rcu(b);
> >               rcu_read_unlock();
> >               if (signal_complete(request)) {
> > -                     local_bh_disable();
> > -                     dma_fence_signal(&request->fence);
> > -                     local_bh_enable(); /* kick start the tasklets */
> > -
> > -                     spin_lock_irq(&b->rb_lock);
> > -
> > -                     /* Wake up all other completed waiters and select the
> > -                      * next bottom-half for the next user interrupt.
> > -                      */
> > -                     __intel_engine_remove_wait(engine,
> > -                                                &request->signaling.wait);
> > -
> > -                     /* Find the next oldest signal. Note that as we have
> > -                      * not been holding the lock, another client may
> > -                      * have installed an even older signal than the one
> > -                      * we just completed - so double check we are still
> > -                      * the oldest before picking the next one.
> > -                      */
> > -                     if (request == rcu_access_pointer(b->first_signal)) {
> > -                             struct rb_node *rb =
> > -                                     rb_next(&request->signaling.node);
> > -                             rcu_assign_pointer(b->first_signal,
> > -                                                rb ? to_signaler(rb) : NULL);
> > +                     if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> > +                                   &request->fence.flags)) {
> > +                             local_bh_disable();
> > +                             dma_fence_signal(&request->fence);
> > +                             local_bh_enable(); /* kick start the tasklets */
> >                       }
> > -                     rb_erase(&request->signaling.node, &b->signals);
> > -                     RB_CLEAR_NODE(&request->signaling.node);
> > -
> > -                     spin_unlock_irq(&b->rb_lock);
> >   
> > -                     i915_gem_request_put(request);
> > +                     if (READ_ONCE(request->signaling.wait.seqno)) {
> > +                             spin_lock_irq(&b->rb_lock);
> > +                             __intel_engine_remove_signal(engine, request);
> > +                             spin_unlock_irq(&b->rb_lock);
> > +                     }
> 
> How would you view taking the request->lock over this block in the 
> signaler and then being able to call simply 
> intel_engine_cancel_signaling - ending up with very similar code as in 
> i915_gem_request_retire?

I am not keen on the conflation here (maybe it's just a hatred of bool
parameters). But at first glance it's just the commonality of
remove_signal, which is already a common routine?

> Only difference would be the tasklet kicking, but maybe still it would 
> be possible to consolidate the two in one helper.
> 
> __i915_gem_request_retire_signaling(req, bool kick_tasklets)
> {
>         if (!DMA_FENCE_FLAG_SIGNALED_BIT) {
>                 dma_fence_signal_locked(...);
>                 if (kick_tasklets) {
>                         local_bh_disable();
>                         local_bh_enable();
>                 }

We can't kick the tasklet inside a spinlock. Especially not a lockclass
as nested as request.lock :(
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-02-05 13:36       ` Chris Wilson
@ 2018-02-05 13:39         ` Chris Wilson
  2018-02-05 13:45         ` Tvrtko Ursulin
  1 sibling, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-02-05 13:39 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Chris Wilson (2018-02-05 13:36:36)
> Quoting Tvrtko Ursulin (2018-02-05 13:23:54)
> > 
> > On 03/02/2018 10:19, Chris Wilson wrote:
> > > @@ -656,41 +705,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> > >                * a new client.
> > >                */
> > >               rcu_read_lock();
> > > -             request = rcu_dereference(b->first_signal);
> > > -             if (request)
> > > -                     request = i915_gem_request_get_rcu(request);
> > > +             request = get_first_signal_rcu(b);
> > >               rcu_read_unlock();
> > >               if (signal_complete(request)) {
> > > -                     local_bh_disable();
> > > -                     dma_fence_signal(&request->fence);
> > > -                     local_bh_enable(); /* kick start the tasklets */
> > > -
> > > -                     spin_lock_irq(&b->rb_lock);
> > > -
> > > -                     /* Wake up all other completed waiters and select the
> > > -                      * next bottom-half for the next user interrupt.
> > > -                      */
> > > -                     __intel_engine_remove_wait(engine,
> > > -                                                &request->signaling.wait);
> > > -
> > > -                     /* Find the next oldest signal. Note that as we have
> > > -                      * not been holding the lock, another client may
> > > -                      * have installed an even older signal than the one
> > > -                      * we just completed - so double check we are still
> > > -                      * the oldest before picking the next one.
> > > -                      */
> > > -                     if (request == rcu_access_pointer(b->first_signal)) {
> > > -                             struct rb_node *rb =
> > > -                                     rb_next(&request->signaling.node);
> > > -                             rcu_assign_pointer(b->first_signal,
> > > -                                                rb ? to_signaler(rb) : NULL);
> > > +                     if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> > > +                                   &request->fence.flags)) {
> > > +                             local_bh_disable();
> > > +                             dma_fence_signal(&request->fence);
> > > +                             local_bh_enable(); /* kick start the tasklets */
> > >                       }
> > > -                     rb_erase(&request->signaling.node, &b->signals);
> > > -                     RB_CLEAR_NODE(&request->signaling.node);
> > > -
> > > -                     spin_unlock_irq(&b->rb_lock);
> > >   
> > > -                     i915_gem_request_put(request);
> > > +                     if (READ_ONCE(request->signaling.wait.seqno)) {
> > > +                             spin_lock_irq(&b->rb_lock);
> > > +                             __intel_engine_remove_signal(engine, request);
> > > +                             spin_unlock_irq(&b->rb_lock);
> > > +                     }
> > 
> > How would you view taking the request->lock over this block in the 
> > signaler and then being able to call simply 
> > intel_engine_cancel_signaling - ending up with very similar code as in 
> > i915_gem_request_retire?
> 
> I am not keen on the conflation here (maybe it's just a hatred of bool
> parameters). But at first glance it's just the commonality of
> remove_signal, which is already a common routine?
> 
> > Only difference would be the tasklet kicking, but maybe still it would 
> > be possible to consolidate the two in one helper.
> > 
> > __i915_gem_request_retire_signaling(req, bool kick_tasklets)
> > {
> >         if (!DMA_FENCE_FLAG_SIGNALED_BIT) {
> >                 dma_fence_signal_locked(...);
> >                 if (kick_tasklets) {
> >                         local_bh_disable();
> >                         local_bh_enable();
> >                 }
> 
> We can't kick the tasklet inside a spinlock. Especially not a lockclass
> as nested as request.lock :(

Also bear in mind this is just an intermediate step in my plans. :)


static int intel_breadcrumbs_signaler(void *arg)
{
        struct intel_engine_cs *engine = arg;
        struct intel_breadcrumbs *b = &engine->breadcrumbs;
        struct drm_i915_gem_request *rq, *n;

        /* Install ourselves with high priority to reduce signalling latency */
        signaler_set_rtpriority();

        do {
                bool do_schedule = true;
                LIST_HEAD(list);
                u32 seqno;

                set_current_state(TASK_INTERRUPTIBLE);
                if (list_empty(&b->signals))
                        goto sleep;
                
                /*
                 * We are either woken up by the interrupt bottom-half,
                 * or by a client adding a new signaller. In both cases,
                 * the GPU seqno may have advanced beyond our oldest signal.
                 * If it has, propagate the signal, remove the waiter and
                 * check again with the next oldest signal. Otherwise we
                 * need to wait for a new interrupt from the GPU or for
                 * a new client.
                 */
                seqno = intel_engine_get_seqno(engine);

                spin_lock_irq(&b->rb_lock);
                list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
                        u32 this = rq->signaling.wait.seqno;

                        GEM_BUG_ON(!rq->signaling.wait.seqno);

                        if (!i915_seqno_passed(seqno, this))
                                break;

                        if (this == i915_gem_request_global_seqno(rq)) {
                                __intel_engine_remove_wait(engine,
                                                           &rq->signaling.wait);

                                rq->signaling.wait.seqno = 0;
                                __list_del_entry(&rq->signaling.link);

                                if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
                                              &rq->fence.flags)) {
                                        list_add_tail(&rq->signaling.link,
                                                      &list);
                                        i915_gem_request_get(rq);
                                }
                        }
                }
                spin_unlock_irq(&b->rb_lock);

                if (!list_empty(&list)) {
                        local_bh_disable();
                        list_for_each_entry_safe(rq, n, &list, signaling.link) {
                                dma_fence_signal(&rq->fence);
                                i915_gem_request_put(rq);
                        }
                        local_bh_enable(); /* kick start the tasklets */
                        
                        /*
                         * If the engine is saturated we may be continually
                         * processing completed requests. This angers the
                         * NMI watchdog if we never let anything else
                         * have access to the CPU. Let's pretend to be nice
                         * and relinquish the CPU if we burn through the
                         * entire RT timeslice!
                         */
                        do_schedule = need_resched();
                }

                if (unlikely(do_schedule)) { 
                        if (current->state & TASK_NORMAL &&
                            !list_empty(&b->signals) &&
                            engine->irq_seqno_barrier &&
                            test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
                                               &engine->irq_posted)) {
                                engine->irq_seqno_barrier(engine);
                                intel_engine_wakeup(engine);
                        }

sleep:
                        if (kthread_should_park())
                                kthread_parkme();

                        if (unlikely(kthread_should_stop()))
                                break;
                 
                        schedule();
                }
        } while (1);
        __set_current_state(TASK_RUNNING);

        return 0;
}

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-02-05 13:36       ` Chris Wilson
  2018-02-05 13:39         ` Chris Wilson
@ 2018-02-05 13:45         ` Tvrtko Ursulin
  2018-02-05 16:11           ` Chris Wilson
  1 sibling, 1 reply; 21+ messages in thread
From: Tvrtko Ursulin @ 2018-02-05 13:45 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 05/02/2018 13:36, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-02-05 13:23:54)
>>
>> On 03/02/2018 10:19, Chris Wilson wrote:
>>> @@ -656,41 +705,21 @@ static int intel_breadcrumbs_signaler(void *arg)
>>>                 * a new client.
>>>                 */
>>>                rcu_read_lock();
>>> -             request = rcu_dereference(b->first_signal);
>>> -             if (request)
>>> -                     request = i915_gem_request_get_rcu(request);
>>> +             request = get_first_signal_rcu(b);
>>>                rcu_read_unlock();
>>>                if (signal_complete(request)) {
>>> -                     local_bh_disable();
>>> -                     dma_fence_signal(&request->fence);
>>> -                     local_bh_enable(); /* kick start the tasklets */
>>> -
>>> -                     spin_lock_irq(&b->rb_lock);
>>> -
>>> -                     /* Wake up all other completed waiters and select the
>>> -                      * next bottom-half for the next user interrupt.
>>> -                      */
>>> -                     __intel_engine_remove_wait(engine,
>>> -                                                &request->signaling.wait);
>>> -
>>> -                     /* Find the next oldest signal. Note that as we have
>>> -                      * not been holding the lock, another client may
>>> -                      * have installed an even older signal than the one
>>> -                      * we just completed - so double check we are still
>>> -                      * the oldest before picking the next one.
>>> -                      */
>>> -                     if (request == rcu_access_pointer(b->first_signal)) {
>>> -                             struct rb_node *rb =
>>> -                                     rb_next(&request->signaling.node);
>>> -                             rcu_assign_pointer(b->first_signal,
>>> -                                                rb ? to_signaler(rb) : NULL);
>>> +                     if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>>> +                                   &request->fence.flags)) {
>>> +                             local_bh_disable();
>>> +                             dma_fence_signal(&request->fence);
>>> +                             local_bh_enable(); /* kick start the tasklets */
>>>                        }
>>> -                     rb_erase(&request->signaling.node, &b->signals);
>>> -                     RB_CLEAR_NODE(&request->signaling.node);
>>> -
>>> -                     spin_unlock_irq(&b->rb_lock);
>>>    
>>> -                     i915_gem_request_put(request);
>>> +                     if (READ_ONCE(request->signaling.wait.seqno)) {
>>> +                             spin_lock_irq(&b->rb_lock);
>>> +                             __intel_engine_remove_signal(engine, request);
>>> +                             spin_unlock_irq(&b->rb_lock);
>>> +                     }
>>
>> How would you view taking the request->lock over this block in the
>> signaler and then being able to call simply
>> intel_engine_cancel_signaling - ending up with very similar code as in
>> i915_gem_request_retire?
> 
> I am not keen on the conflation here (maybe it's just a hatred of bool
> parameters). But at first glance it's just the commonality of
> remove_signal, which is already a common routine?
> 
>> Only difference would be the tasklet kicking, but maybe still it would
>> be possible to consolidate the two in one helper.
>>
>> __i915_gem_request_retire_signaling(req, bool kick_tasklets)
>> {
>>          if (!DMA_FENCE_FLAG_SIGNALED_BIT) {
>>                  dma_fence_signal_locked(...);
>>                  if (kick_tasklets) {
>>                          local_bh_disable();
>>                          local_bh_enable();
>>                  }
> 
> We can't kick the tasklet inside a spinlock. Especially not a lockclass
> as nested as request.lock :(

Yep bool is ugly. So maybe make the helper return status, so the caller 
can kick if fence was signaled? Or you would worry about losing a little 
bit of latency? That also is not ideal I agree.

Too bad, I would kind of like to consolidate if nothing to be 
symmetrical wrt req->lock usage.

Otherwise patch looks safe to me. At least I failed to find any problems.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: ✗ Fi.CI.IGT: failure for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
  2018-02-03 11:32 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-02-05 16:09   ` Chris Wilson
  0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-02-05 16:09 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2018-02-03 11:32:01)
> == Series Details ==
> 
> Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
> URL   : https://patchwork.freedesktop.org/series/36908/
> State : failure
> 
> == Summary ==
> 
> Test perf_pmu:
>         Subgroup busy-double-start-vcs0:
>                 pass       -> INCOMPLETE (shard-apl)
> Test kms_cursor_legacy:
>         Subgroup flip-vs-cursor-atomic:
>                 fail       -> PASS       (shard-hsw) fdo#102670 +1
> Test kms_flip:
>         Subgroup flip-vs-absolute-wf_vblank-interruptible:
>                 pass       -> FAIL       (shard-hsw) fdo#100368
>         Subgroup 2x-flip-vs-expired-vblank-interruptible:
>                 fail       -> PASS       (shard-hsw) fdo#102887
>         Subgroup 2x-plain-flip-ts-check:
>                 fail       -> PASS       (shard-hsw)
> Test kms_rotation_crc:
>         Subgroup bad-tiling:
>                 pass       -> FAIL       (shard-apl) fdo#103925
> Test gem_eio:
>         Subgroup in-flight-contexts:
>                 dmesg-warn -> PASS       (shard-snb) fdo#104058
> Test kms_draw_crc:
>         Subgroup fill-fb:
>                 pass       -> SKIP       (shard-snb)
> 
> fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
> fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
> fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
> fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
> fdo#104058 https://bugs.freedesktop.org/show_bug.cgi?id=104058
> 
> shard-apl        total:2795 pass:1724 dwarn:1   dfail:0   fail:20  skip:1049 time:12065s
> shard-hsw        total:2836 pass:1732 dwarn:1   dfail:0   fail:12  skip:1090 time:11572s
> shard-snb        total:2836 pass:1327 dwarn:1   dfail:0   fail:10  skip:1498 time:6385s
> Blacklisted hosts:
> shard-kbl        total:2836 pass:1870 dwarn:3   dfail:0   fail:22  skip:941 time:9372s
> 
> == Logs ==
> 
> For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7878/shards.html
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
  2018-02-05 13:45         ` Tvrtko Ursulin
@ 2018-02-05 16:11           ` Chris Wilson
  0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-02-05 16:11 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-02-05 13:45:16)
> 
> On 05/02/2018 13:36, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-02-05 13:23:54)
> >> How would you view taking the request->lock over this block in the
> >> signaler and then being able to call simply
> >> intel_engine_cancel_signaling - ending up with very similar code as in
> >> i915_gem_request_retire?
> > 
> > I am not keen on the conflation here (maybe it's just a hatred of bool
> > parameters). But at first glance it's just the commonality of
> > remove_signal, which is already a common routine?
> > 
> >> Only difference would be the tasklet kicking, but maybe still it would
> >> be possible to consolidate the two in one helper.
> >>
> >> __i915_gem_request_retire_signaling(req, bool kick_tasklets)
> >> {
> >>          if (!DMA_FENCE_FLAG_SIGNALED_BIT) {
> >>                  dma_fence_signal_locked(...);
> >>                  if (kick_tasklets) {
> >>                          local_bh_disable();
> >>                          local_bh_enable();
> >>                  }
> > 
> > We can't kick the tasklet inside a spinlock. Especially not a lockclass
> > as nested as request.lock :(
> 
> Yep bool is ugly. So maybe make the helper return status, so the caller 
> can kick if fence was signaled? Or you would worry about losing a little 
> bit of latency? That also is not ideal I agree.
> 
> Too bad, I would kind of like to consolidate if nothing to be 
> symmetrical wrt req->lock usage.
> 
> Otherwise patch looks safe to me. At least I failed to find any problems.
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Took the r-b and pushed because I wanted to see the back of this oom
fix. The next steps in this series are to try and lighten the
irq/signaler threads, so suggestions are most appreciated.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2018-02-05 16:11 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
2018-01-22 16:07 ` ✗ Fi.CI.BAT: warning for " Patchwork
2018-01-22 19:01 ` ✓ Fi.CI.BAT: success " Patchwork
2018-01-23  0:03 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-01-24 13:09 ` [PATCH] " Tvrtko Ursulin
2018-01-24 14:44   ` Chris Wilson
2018-01-26  9:42     ` Chris Wilson
2018-01-31 10:02 ` Chris Wilson
2018-01-31 17:18 ` Tvrtko Ursulin
2018-01-31 17:30   ` Chris Wilson
2018-01-31 20:40     ` Chris Wilson
2018-01-31 20:43       ` Chris Wilson
2018-02-03 10:19   ` [PATCH v2] " Chris Wilson
2018-02-05 13:23     ` Tvrtko Ursulin
2018-02-05 13:36       ` Chris Wilson
2018-02-05 13:39         ` Chris Wilson
2018-02-05 13:45         ` Tvrtko Ursulin
2018-02-05 16:11           ` Chris Wilson
2018-02-03 10:40 ` ✓ Fi.CI.BAT: success for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2) Patchwork
2018-02-03 11:32 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-02-05 16:09   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.