* [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
@ 2018-01-22 15:41 Chris Wilson
2018-01-22 16:07 ` ✗ Fi.CI.BAT: warning for " Patchwork
` (7 more replies)
0 siblings, 8 replies; 21+ messages in thread
From: Chris Wilson @ 2018-01-22 15:41 UTC (permalink / raw)
To: intel-gfx
If we remember to cancel the signaler on a request when retiring it
(after we know that the request has been signaled), we do not need to
carry an additional request in the signaler itself. This prevents an
issue whereby the signaler threads may be delayed and hold on to
thousands of request references, causing severe memory fragmentation and
premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
and frequent use of inter-engine fences).
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_gem_request.c | 6 +-
drivers/gpu/drm/i915/intel_breadcrumbs.c | 149 +++++++++++++++++--------------
2 files changed, 85 insertions(+), 70 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a0f451b4a4e8..e4eca6ba0d05 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -441,7 +441,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
spin_lock_irq(&request->lock);
if (request->waitboost)
atomic_dec(&request->i915->gt_pm.rps.num_waiters);
- dma_fence_signal_locked(&request->fence);
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
+ dma_fence_signal_locked(&request->fence);
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+ intel_engine_cancel_signaling(request);
spin_unlock_irq(&request->lock);
i915_priotree_fini(request->i915, &request->priotree);
@@ -738,6 +741,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
/* No zalloc, must clear what we need by hand */
req->global_seqno = 0;
+ req->signaling.wait.seqno = 0;
req->file_priv = NULL;
req->batch = NULL;
req->capture_list = NULL;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 86acac010bb8..e3667dc1e96d 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -235,7 +235,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
struct intel_wait *wait, *n;
if (!b->irq_armed)
- goto wakeup_signaler;
+ return;
/*
* We only disarm the irq when we are idle (all requests completed),
@@ -260,14 +260,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
b->waiters = RB_ROOT;
spin_unlock_irq(&b->rb_lock);
-
- /*
- * The signaling thread may be asleep holding a reference to a request,
- * that had its signaling cancelled prior to being preempted. We need
- * to kick the signaler, just in case, to release any such reference.
- */
-wakeup_signaler:
- wake_up_process(b->signaler);
}
static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -644,6 +636,62 @@ static void signaler_set_rtpriority(void)
sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
}
+static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
+ struct drm_i915_gem_request *request)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ lockdep_assert_held(&b->rb_lock);
+
+ /*
+ * Wake up all other completed waiters and select the
+ * next bottom-half for the next user interrupt.
+ */
+ __intel_engine_remove_wait(engine, &request->signaling.wait);
+
+ /*
+ * Find the next oldest signal. Note that as we have
+ * not been holding the lock, another client may
+ * have installed an even older signal than the one
+ * we just completed - so double check we are still
+ * the oldest before picking the next one.
+ */
+ if (request->signaling.wait.seqno) {
+ if (request == rcu_access_pointer(b->first_signal)) {
+ struct rb_node *rb = rb_next(&request->signaling.node);
+ rcu_assign_pointer(b->first_signal,
+ rb ? to_signaler(rb) : NULL);
+ }
+
+ rb_erase(&request->signaling.node, &b->signals);
+ request->signaling.wait.seqno = 0;
+ }
+}
+
+static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
+{
+ /*
+ * See the big warnings for i915_gem_active_get_rcu() and similarly
+ * for dma_fence_get_rcu_safe() that explain the intricacies involved
+ * here with defeating CPU/compiler speculation and enforcing
+ * the required memory barriers.
+ */
+ do {
+ struct drm_i915_gem_request *request;
+
+ request = rcu_dereference(b->first_signal);
+ if (request)
+ request = i915_gem_request_get_rcu(request);
+
+ barrier();
+
+ if (!request || request == rcu_access_pointer(b->first_signal))
+ return rcu_pointer_handoff(request);
+
+ i915_gem_request_put(request);
+ } while (1);
+}
+
static int intel_breadcrumbs_signaler(void *arg)
{
struct intel_engine_cs *engine = arg;
@@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
* a new client.
*/
rcu_read_lock();
- request = rcu_dereference(b->first_signal);
- if (request)
- request = i915_gem_request_get_rcu(request);
+ request = first_signal(b);
rcu_read_unlock();
if (signal_complete(request)) {
- local_bh_disable();
- dma_fence_signal(&request->fence);
- local_bh_enable(); /* kick start the tasklets */
-
- spin_lock_irq(&b->rb_lock);
-
- /* Wake up all other completed waiters and select the
- * next bottom-half for the next user interrupt.
- */
- __intel_engine_remove_wait(engine,
- &request->signaling.wait);
-
- /* Find the next oldest signal. Note that as we have
- * not been holding the lock, another client may
- * have installed an even older signal than the one
- * we just completed - so double check we are still
- * the oldest before picking the next one.
- */
- if (request == rcu_access_pointer(b->first_signal)) {
- struct rb_node *rb =
- rb_next(&request->signaling.node);
- rcu_assign_pointer(b->first_signal,
- rb ? to_signaler(rb) : NULL);
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &request->fence.flags)) {
+ local_bh_disable();
+ dma_fence_signal(&request->fence);
+ local_bh_enable(); /* kick start the tasklets */
}
- rb_erase(&request->signaling.node, &b->signals);
- RB_CLEAR_NODE(&request->signaling.node);
-
- spin_unlock_irq(&b->rb_lock);
- i915_gem_request_put(request);
+ if (request->signaling.wait.seqno) {
+ spin_lock_irq(&b->rb_lock);
+ __intel_engine_remove_signal(engine, request);
+ spin_unlock_irq(&b->rb_lock);
+ }
/* If the engine is saturated we may be continually
* processing completed requests. This angers the
@@ -712,19 +740,17 @@ static int intel_breadcrumbs_signaler(void *arg)
*/
do_schedule = need_resched();
}
+ i915_gem_request_put(request);
if (unlikely(do_schedule)) {
if (kthread_should_park())
kthread_parkme();
- if (unlikely(kthread_should_stop())) {
- i915_gem_request_put(request);
+ if (unlikely(kthread_should_stop()))
break;
- }
schedule();
}
- i915_gem_request_put(request);
} while (1);
__set_current_state(TASK_RUNNING);
@@ -753,12 +779,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
if (!seqno)
return;
+ spin_lock(&b->rb_lock);
+
+ GEM_BUG_ON(request->signaling.wait.seqno);
request->signaling.wait.tsk = b->signaler;
request->signaling.wait.request = request;
request->signaling.wait.seqno = seqno;
- i915_gem_request_get(request);
-
- spin_lock(&b->rb_lock);
/* First add ourselves into the list of waiters, but register our
* bottom-half as the signaller thread. As per usual, only the oldest
@@ -797,7 +823,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
rcu_assign_pointer(b->first_signal, request);
} else {
__intel_engine_remove_wait(engine, &request->signaling.wait);
- i915_gem_request_put(request);
+ request->signaling.wait.seqno = 0;
wakeup = false;
}
@@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
{
- struct intel_engine_cs *engine = request->engine;
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&request->lock);
- GEM_BUG_ON(!request->signaling.wait.seqno);
- spin_lock(&b->rb_lock);
+ if (request->signaling.wait.seqno) {
+ struct intel_engine_cs *engine = request->engine;
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
- if (!RB_EMPTY_NODE(&request->signaling.node)) {
- if (request == rcu_access_pointer(b->first_signal)) {
- struct rb_node *rb =
- rb_next(&request->signaling.node);
- rcu_assign_pointer(b->first_signal,
- rb ? to_signaler(rb) : NULL);
- }
- rb_erase(&request->signaling.node, &b->signals);
- RB_CLEAR_NODE(&request->signaling.node);
- i915_gem_request_put(request);
+ spin_lock(&b->rb_lock);
+ __intel_engine_remove_signal(engine, request);
+ spin_unlock(&b->rb_lock);
}
-
- __intel_engine_remove_wait(engine, &request->signaling.wait);
-
- spin_unlock(&b->rb_lock);
-
- request->signaling.wait.seqno = 0;
}
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
--
2.15.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 21+ messages in thread
* ✗ Fi.CI.BAT: warning for drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
@ 2018-01-22 16:07 ` Patchwork
2018-01-22 19:01 ` ✓ Fi.CI.BAT: success " Patchwork
` (6 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2018-01-22 16:07 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread
URL : https://patchwork.freedesktop.org/series/36908/
State : warning
== Summary ==
Series 36908v1 drm/i915/breadcrumbs: Drop request reference for the signaler thread
https://patchwork.freedesktop.org/api/1.0/series/36908/revisions/1/mbox/
Test debugfs_test:
Subgroup read_all_entries:
fail -> DMESG-WARN (fi-elk-e7500) fdo#103989 +1
Test gem_exec_suspend:
Subgroup basic-s4-devices:
pass -> DMESG-WARN (fi-kbl-7500u)
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-b:
pass -> INCOMPLETE (fi-snb-2520m) fdo#103713
fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:428s
fi-blb-e6850 total:288 pass:223 dwarn:1 dfail:0 fail:0 skip:64 time:371s
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:488s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:281s
fi-bxt-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:482s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:483s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:466s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:456s
fi-elk-e7500 total:224 pass:168 dwarn:10 dfail:0 fail:0 skip:45
fi-gdg-551 total:288 pass:179 dwarn:0 dfail:0 fail:1 skip:108 time:279s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:513s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:398s
fi-hsw-4770r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:399s
fi-ilk-650 total:288 pass:228 dwarn:0 dfail:0 fail:0 skip:60 time:412s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:460s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:416s
fi-kbl-7500u total:288 pass:262 dwarn:2 dfail:0 fail:0 skip:24 time:459s
fi-kbl-7560u total:288 pass:269 dwarn:0 dfail:0 fail:0 skip:19 time:497s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:453s
fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:504s
fi-pnv-d510 total:288 pass:222 dwarn:1 dfail:0 fail:0 skip:65 time:572s
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:431s
fi-skl-6600u total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:510s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:529s
fi-skl-6700k2 total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:486s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:492s
fi-skl-guc total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:416s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:436s
fi-snb-2520m total:245 pass:211 dwarn:0 dfail:0 fail:0 skip:33
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:399s
Blacklisted hosts:
fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:568s
fi-glk-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:470s
fi-bdw-gvtdvm failed to collect. IGT log at Patchwork_7740/fi-bdw-gvtdvm/igt.log
0edb3ea337349387863e60ed0bdf53b9b2767cf4 drm-tip: 2018y-01m-22d-14h-27m-52s UTC integration manifest
c6f6fdb0d9c0 drm/i915/breadcrumbs: Drop request reference for the signaler thread
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7740/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* ✓ Fi.CI.BAT: success for drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
2018-01-22 16:07 ` ✗ Fi.CI.BAT: warning for " Patchwork
@ 2018-01-22 19:01 ` Patchwork
2018-01-23 0:03 ` ✗ Fi.CI.IGT: failure " Patchwork
` (5 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2018-01-22 19:01 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread
URL : https://patchwork.freedesktop.org/series/36908/
State : success
== Summary ==
Series 36908v1 drm/i915/breadcrumbs: Drop request reference for the signaler thread
https://patchwork.freedesktop.org/api/1.0/series/36908/revisions/1/mbox/
Test debugfs_test:
Subgroup read_all_entries:
dmesg-fail -> DMESG-WARN (fi-elk-e7500) fdo#103989
Test gem_mmap_gtt:
Subgroup basic-small-bo-tiledx:
fail -> PASS (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-b:
incomplete -> PASS (fi-snb-2520m) fdo#103713
fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:420s
fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:427s
fi-blb-e6850 total:288 pass:223 dwarn:1 dfail:0 fail:0 skip:64 time:374s
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:489s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:280s
fi-bxt-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:482s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:482s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:468s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:455s
fi-elk-e7500 total:224 pass:168 dwarn:10 dfail:0 fail:0 skip:45
fi-gdg-551 total:288 pass:180 dwarn:0 dfail:0 fail:0 skip:108 time:280s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:517s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:392s
fi-hsw-4770r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:399s
fi-ilk-650 total:288 pass:228 dwarn:0 dfail:0 fail:0 skip:60 time:412s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:466s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:416s
fi-kbl-7500u total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:460s
fi-kbl-7560u total:288 pass:269 dwarn:0 dfail:0 fail:0 skip:19 time:494s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:454s
fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:501s
fi-pnv-d510 total:288 pass:222 dwarn:1 dfail:0 fail:0 skip:65 time:585s
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:434s
fi-skl-6600u total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:515s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:530s
fi-skl-6700k2 total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:488s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:491s
fi-skl-guc total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:422s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:430s
fi-snb-2520m total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:516s
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:398s
Blacklisted hosts:
fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:569s
fi-glk-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:472s
06c8efda323ac918fad0e26d81e8884574ec8b84 drm-tip: 2018y-01m-22d-17h-43m-26s UTC integration manifest
a40183e96a23 drm/i915/breadcrumbs: Drop request reference for the signaler thread
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7743/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* ✗ Fi.CI.IGT: failure for drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
2018-01-22 16:07 ` ✗ Fi.CI.BAT: warning for " Patchwork
2018-01-22 19:01 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-01-23 0:03 ` Patchwork
2018-01-24 13:09 ` [PATCH] " Tvrtko Ursulin
` (4 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2018-01-23 0:03 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread
URL : https://patchwork.freedesktop.org/series/36908/
State : failure
== Summary ==
Test kms_cursor_crc:
Subgroup cursor-256x256-suspend:
incomplete -> PASS (shard-hsw) fdo#103375 +1
Test kms_flip:
Subgroup 2x-flip-vs-expired-vblank:
fail -> PASS (shard-hsw)
Subgroup vblank-vs-suspend:
pass -> INCOMPLETE (shard-hsw) fdo#100368
Test perf:
Subgroup polling:
fail -> PASS (shard-hsw) fdo#102252
Test gem_eio:
Subgroup in-flight-external:
fail -> PASS (shard-hsw) fdo#104676
Test kms_frontbuffer_tracking:
Subgroup fbc-1p-primscrn-cur-indfb-move:
pass -> SKIP (shard-snb) fdo#101623 +1
Test perf_pmu:
Subgroup multi-client-vcs0:
pass -> FAIL (shard-apl)
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#104676 https://bugs.freedesktop.org/show_bug.cgi?id=104676
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
shard-apl total:2753 pass:1713 dwarn:1 dfail:0 fail:26 skip:1013 time:13941s
shard-hsw total:2721 pass:1706 dwarn:1 dfail:0 fail:10 skip:1002 time:14634s
shard-snb total:2753 pass:1317 dwarn:1 dfail:0 fail:10 skip:1425 time:7887s
Blacklisted hosts:
shard-kbl total:2753 pass:1812 dwarn:24 dfail:2 fail:25 skip:890 time:10845s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7740/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
` (2 preceding siblings ...)
2018-01-23 0:03 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-01-24 13:09 ` Tvrtko Ursulin
2018-01-24 14:44 ` Chris Wilson
2018-01-31 10:02 ` Chris Wilson
` (3 subsequent siblings)
7 siblings, 1 reply; 21+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 13:09 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 22/01/2018 15:41, Chris Wilson wrote:
> If we remember to cancel the signaler on a request when retiring it
> (after we know that the request has been signaled), we do not need to
> carry an additional request in the signaler itself. This prevents an
> issue whereby the signaler threads may be delayed and hold on to
> thousands of request references, causing severe memory fragmentation and
> premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> and frequent use of inter-engine fences).
What is starving the signaler thread, which is set to SCHED_FIFO, and
can't be tasklets on SNB?
Before I actually start revieweing the code, which I'd rather avoid :) :
Is it just not able to process enough requests in it's time-slice
(need_resched) so is falling behind? It would be surprising since I
would expect it to be much lighter wait processing there, per request,
than on the submission paths.
Regards,
Tvrtko
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_request.c | 6 +-
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 149 +++++++++++++++++--------------
> 2 files changed, 85 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index a0f451b4a4e8..e4eca6ba0d05 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -441,7 +441,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> spin_lock_irq(&request->lock);
> if (request->waitboost)
> atomic_dec(&request->i915->gt_pm.rps.num_waiters);
> - dma_fence_signal_locked(&request->fence);
> + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
> + dma_fence_signal_locked(&request->fence);
> + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> + intel_engine_cancel_signaling(request);
> spin_unlock_irq(&request->lock);
>
> i915_priotree_fini(request->i915, &request->priotree);
> @@ -738,6 +741,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>
> /* No zalloc, must clear what we need by hand */
> req->global_seqno = 0;
> + req->signaling.wait.seqno = 0;
> req->file_priv = NULL;
> req->batch = NULL;
> req->capture_list = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 86acac010bb8..e3667dc1e96d 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -235,7 +235,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> struct intel_wait *wait, *n;
>
> if (!b->irq_armed)
> - goto wakeup_signaler;
> + return;
>
> /*
> * We only disarm the irq when we are idle (all requests completed),
> @@ -260,14 +260,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> b->waiters = RB_ROOT;
>
> spin_unlock_irq(&b->rb_lock);
> -
> - /*
> - * The signaling thread may be asleep holding a reference to a request,
> - * that had its signaling cancelled prior to being preempted. We need
> - * to kick the signaler, just in case, to release any such reference.
> - */
> -wakeup_signaler:
> - wake_up_process(b->signaler);
> }
>
> static bool use_fake_irq(const struct intel_breadcrumbs *b)
> @@ -644,6 +636,62 @@ static void signaler_set_rtpriority(void)
> sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
> }
>
> +static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
> + struct drm_i915_gem_request *request)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> + lockdep_assert_held(&b->rb_lock);
> +
> + /*
> + * Wake up all other completed waiters and select the
> + * next bottom-half for the next user interrupt.
> + */
> + __intel_engine_remove_wait(engine, &request->signaling.wait);
> +
> + /*
> + * Find the next oldest signal. Note that as we have
> + * not been holding the lock, another client may
> + * have installed an even older signal than the one
> + * we just completed - so double check we are still
> + * the oldest before picking the next one.
> + */
> + if (request->signaling.wait.seqno) {
> + if (request == rcu_access_pointer(b->first_signal)) {
> + struct rb_node *rb = rb_next(&request->signaling.node);
> + rcu_assign_pointer(b->first_signal,
> + rb ? to_signaler(rb) : NULL);
> + }
> +
> + rb_erase(&request->signaling.node, &b->signals);
> + request->signaling.wait.seqno = 0;
> + }
> +}
> +
> +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> +{
> + /*
> + * See the big warnings for i915_gem_active_get_rcu() and similarly
> + * for dma_fence_get_rcu_safe() that explain the intricacies involved
> + * here with defeating CPU/compiler speculation and enforcing
> + * the required memory barriers.
> + */
> + do {
> + struct drm_i915_gem_request *request;
> +
> + request = rcu_dereference(b->first_signal);
> + if (request)
> + request = i915_gem_request_get_rcu(request);
> +
> + barrier();
> +
> + if (!request || request == rcu_access_pointer(b->first_signal))
> + return rcu_pointer_handoff(request);
> +
> + i915_gem_request_put(request);
> + } while (1);
> +}
> +
> static int intel_breadcrumbs_signaler(void *arg)
> {
> struct intel_engine_cs *engine = arg;
> @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> * a new client.
> */
> rcu_read_lock();
> - request = rcu_dereference(b->first_signal);
> - if (request)
> - request = i915_gem_request_get_rcu(request);
> + request = first_signal(b);
> rcu_read_unlock();
> if (signal_complete(request)) {
> - local_bh_disable();
> - dma_fence_signal(&request->fence);
> - local_bh_enable(); /* kick start the tasklets */
> -
> - spin_lock_irq(&b->rb_lock);
> -
> - /* Wake up all other completed waiters and select the
> - * next bottom-half for the next user interrupt.
> - */
> - __intel_engine_remove_wait(engine,
> - &request->signaling.wait);
> -
> - /* Find the next oldest signal. Note that as we have
> - * not been holding the lock, another client may
> - * have installed an even older signal than the one
> - * we just completed - so double check we are still
> - * the oldest before picking the next one.
> - */
> - if (request == rcu_access_pointer(b->first_signal)) {
> - struct rb_node *rb =
> - rb_next(&request->signaling.node);
> - rcu_assign_pointer(b->first_signal,
> - rb ? to_signaler(rb) : NULL);
> + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> + &request->fence.flags)) {
> + local_bh_disable();
> + dma_fence_signal(&request->fence);
> + local_bh_enable(); /* kick start the tasklets */
> }
> - rb_erase(&request->signaling.node, &b->signals);
> - RB_CLEAR_NODE(&request->signaling.node);
> -
> - spin_unlock_irq(&b->rb_lock);
>
> - i915_gem_request_put(request);
> + if (request->signaling.wait.seqno) {
> + spin_lock_irq(&b->rb_lock);
> + __intel_engine_remove_signal(engine, request);
> + spin_unlock_irq(&b->rb_lock);
> + }
>
> /* If the engine is saturated we may be continually
> * processing completed requests. This angers the
> @@ -712,19 +740,17 @@ static int intel_breadcrumbs_signaler(void *arg)
> */
> do_schedule = need_resched();
> }
> + i915_gem_request_put(request);
>
> if (unlikely(do_schedule)) {
> if (kthread_should_park())
> kthread_parkme();
>
> - if (unlikely(kthread_should_stop())) {
> - i915_gem_request_put(request);
> + if (unlikely(kthread_should_stop()))
> break;
> - }
>
> schedule();
> }
> - i915_gem_request_put(request);
> } while (1);
> __set_current_state(TASK_RUNNING);
>
> @@ -753,12 +779,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> if (!seqno)
> return;
>
> + spin_lock(&b->rb_lock);
> +
> + GEM_BUG_ON(request->signaling.wait.seqno);
> request->signaling.wait.tsk = b->signaler;
> request->signaling.wait.request = request;
> request->signaling.wait.seqno = seqno;
> - i915_gem_request_get(request);
> -
> - spin_lock(&b->rb_lock);
>
> /* First add ourselves into the list of waiters, but register our
> * bottom-half as the signaller thread. As per usual, only the oldest
> @@ -797,7 +823,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> rcu_assign_pointer(b->first_signal, request);
> } else {
> __intel_engine_remove_wait(engine, &request->signaling.wait);
> - i915_gem_request_put(request);
> + request->signaling.wait.seqno = 0;
> wakeup = false;
> }
>
> @@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>
> void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
> {
> - struct intel_engine_cs *engine = request->engine;
> - struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
> GEM_BUG_ON(!irqs_disabled());
> lockdep_assert_held(&request->lock);
> - GEM_BUG_ON(!request->signaling.wait.seqno);
>
> - spin_lock(&b->rb_lock);
> + if (request->signaling.wait.seqno) {
> + struct intel_engine_cs *engine = request->engine;
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
>
> - if (!RB_EMPTY_NODE(&request->signaling.node)) {
> - if (request == rcu_access_pointer(b->first_signal)) {
> - struct rb_node *rb =
> - rb_next(&request->signaling.node);
> - rcu_assign_pointer(b->first_signal,
> - rb ? to_signaler(rb) : NULL);
> - }
> - rb_erase(&request->signaling.node, &b->signals);
> - RB_CLEAR_NODE(&request->signaling.node);
> - i915_gem_request_put(request);
> + spin_lock(&b->rb_lock);
> + __intel_engine_remove_signal(engine, request);
> + spin_unlock(&b->rb_lock);
> }
> -
> - __intel_engine_remove_wait(engine, &request->signaling.wait);
> -
> - spin_unlock(&b->rb_lock);
> -
> - request->signaling.wait.seqno = 0;
> }
>
> int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-24 13:09 ` [PATCH] " Tvrtko Ursulin
@ 2018-01-24 14:44 ` Chris Wilson
2018-01-26 9:42 ` Chris Wilson
0 siblings, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2018-01-24 14:44 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
Quoting Tvrtko Ursulin (2018-01-24 13:09:37)
>
> On 22/01/2018 15:41, Chris Wilson wrote:
> > If we remember to cancel the signaler on a request when retiring it
> > (after we know that the request has been signaled), we do not need to
> > carry an additional request in the signaler itself. This prevents an
> > issue whereby the signaler threads may be delayed and hold on to
> > thousands of request references, causing severe memory fragmentation and
> > premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> > and frequent use of inter-engine fences).
>
> What is starving the signaler thread, which is set to SCHED_FIFO, and
> can't be tasklets on SNB?
Interrupts. MI_USER_INTERRUPT to be precise, but we have to check all
the other sources on snb as well.
> Before I actually start revieweing the code, which I'd rather avoid :) :
>
> Is it just not able to process enough requests in it's time-slice
> (need_resched) so is falling behind? It would be surprising since I
> would expect it to be much lighter wait processing there, per request,
> than on the submission paths.
The conclusion is a bit odd, but more or less it's just a pathological
case where interrupts + rt task are contending for one cpu with
submission proceeding on another. Making the signaler lighter was the
intention of the rest of the series, but this patch by itself prevents
the runaway references.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-24 14:44 ` Chris Wilson
@ 2018-01-26 9:42 ` Chris Wilson
0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-01-26 9:42 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
Quoting Chris Wilson (2018-01-24 14:44:01)
> Quoting Tvrtko Ursulin (2018-01-24 13:09:37)
> >
> > On 22/01/2018 15:41, Chris Wilson wrote:
> > > If we remember to cancel the signaler on a request when retiring it
> > > (after we know that the request has been signaled), we do not need to
> > > carry an additional request in the signaler itself. This prevents an
> > > issue whereby the signaler threads may be delayed and hold on to
> > > thousands of request references, causing severe memory fragmentation and
> > > premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> > > and frequent use of inter-engine fences).
> >
> > What is starving the signaler thread, which is set to SCHED_FIFO, and
> > can't be tasklets on SNB?
>
> Interrupts. MI_USER_INTERRUPT to be precise, but we have to check all
> the other sources on snb as well.
>
> > Before I actually start revieweing the code, which I'd rather avoid :) :
> >
> > Is it just not able to process enough requests in it's time-slice
> > (need_resched) so is falling behind? It would be surprising since I
> > would expect it to be much lighter wait processing there, per request,
> > than on the submission paths.
>
> The conclusion is a bit odd, but more or less it's just a pathological
> case where interrupts + rt task are contending for one cpu with
> submission proceeding on another. Making the signaler lighter was the
> intention of the rest of the series, but this patch by itself prevents
> the runaway references.
Whilst I'm thinking of this, when I hit oom on snb, there were ~3
million requests allocated (/proc/slabinfo) but only ~3 in-flight.
Tracing the request references gave the clue that the only outstanding
ones were in the signaler (there were only 2 sources of references, one
for the active request and one for the signaler; and we accounted for the
active request knowing that they were being retired).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
` (3 preceding siblings ...)
2018-01-24 13:09 ` [PATCH] " Tvrtko Ursulin
@ 2018-01-31 10:02 ` Chris Wilson
2018-01-31 17:18 ` Tvrtko Ursulin
` (2 subsequent siblings)
7 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-01-31 10:02 UTC (permalink / raw)
To: intel-gfx
Quoting Chris Wilson (2018-01-22 15:41:06)
> If we remember to cancel the signaler on a request when retiring it
> (after we know that the request has been signaled), we do not need to
> carry an additional request in the signaler itself. This prevents an
> issue whereby the signaler threads may be delayed and hold on to
> thousands of request references, causing severe memory fragmentation and
> premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> and frequent use of inter-engine fences).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Poke. I've been staring at this problem since December, it would be nice
to get some resolution :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
` (4 preceding siblings ...)
2018-01-31 10:02 ` Chris Wilson
@ 2018-01-31 17:18 ` Tvrtko Ursulin
2018-01-31 17:30 ` Chris Wilson
2018-02-03 10:19 ` [PATCH v2] " Chris Wilson
2018-02-03 10:40 ` ✓ Fi.CI.BAT: success for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2) Patchwork
2018-02-03 11:32 ` ✗ Fi.CI.IGT: failure " Patchwork
7 siblings, 2 replies; 21+ messages in thread
From: Tvrtko Ursulin @ 2018-01-31 17:18 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 22/01/2018 15:41, Chris Wilson wrote:
> If we remember to cancel the signaler on a request when retiring it
> (after we know that the request has been signaled), we do not need to
> carry an additional request in the signaler itself. This prevents an
> issue whereby the signaler threads may be delayed and hold on to
> thousands of request references, causing severe memory fragmentation and
> premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> and frequent use of inter-engine fences).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_request.c | 6 +-
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 149 +++++++++++++++++--------------
> 2 files changed, 85 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index a0f451b4a4e8..e4eca6ba0d05 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -441,7 +441,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> spin_lock_irq(&request->lock);
> if (request->waitboost)
> atomic_dec(&request->i915->gt_pm.rps.num_waiters);
> - dma_fence_signal_locked(&request->fence);
> + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
> + dma_fence_signal_locked(&request->fence);
> + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> + intel_engine_cancel_signaling(request);
> spin_unlock_irq(&request->lock);
>
> i915_priotree_fini(request->i915, &request->priotree);
> @@ -738,6 +741,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>
> /* No zalloc, must clear what we need by hand */
> req->global_seqno = 0;
> + req->signaling.wait.seqno = 0;
> req->file_priv = NULL;
> req->batch = NULL;
> req->capture_list = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 86acac010bb8..e3667dc1e96d 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -235,7 +235,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> struct intel_wait *wait, *n;
>
> if (!b->irq_armed)
> - goto wakeup_signaler;
> + return;
>
> /*
> * We only disarm the irq when we are idle (all requests completed),
> @@ -260,14 +260,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> b->waiters = RB_ROOT;
>
> spin_unlock_irq(&b->rb_lock);
> -
> - /*
> - * The signaling thread may be asleep holding a reference to a request,
> - * that had its signaling cancelled prior to being preempted. We need
> - * to kick the signaler, just in case, to release any such reference.
> - */
> -wakeup_signaler:
> - wake_up_process(b->signaler);
> }
>
> static bool use_fake_irq(const struct intel_breadcrumbs *b)
> @@ -644,6 +636,62 @@ static void signaler_set_rtpriority(void)
> sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
> }
>
> +static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
> + struct drm_i915_gem_request *request)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> + lockdep_assert_held(&b->rb_lock);
> +
> + /*
> + * Wake up all other completed waiters and select the
> + * next bottom-half for the next user interrupt.
> + */
> + __intel_engine_remove_wait(engine, &request->signaling.wait);
> +
> + /*
> + * Find the next oldest signal. Note that as we have
> + * not been holding the lock, another client may
Which lock, request-lock? b->first_signal seems to always be updated
under the b->rb_lock, so I am not sure of the
request->signaling.wait.seqno check.
wait.seqno is also updated without the request->lock below, so maybe you
were talking about the rb_lock after all?
How does wait.seqno becomes 0 outside the rb_lock? Is it due RCU
business this patch adds? Like it is retired before the signaler runs?
> + * have installed an even older signal than the one
> + * we just completed - so double check we are still
> + * the oldest before picking the next one.
> + */
> + if (request->signaling.wait.seqno) {
> + if (request == rcu_access_pointer(b->first_signal)) {
> + struct rb_node *rb = rb_next(&request->signaling.node);
> + rcu_assign_pointer(b->first_signal,
> + rb ? to_signaler(rb) : NULL);
> + }
> +
> + rb_erase(&request->signaling.node, &b->signals);
> + request->signaling.wait.seqno = 0;
> + }
> +}
> +
> +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> +{
> + /*
> + * See the big warnings for i915_gem_active_get_rcu() and similarly
> + * for dma_fence_get_rcu_safe() that explain the intricacies involved
> + * here with defeating CPU/compiler speculation and enforcing
> + * the required memory barriers.
> + */
> + do {
> + struct drm_i915_gem_request *request;
> +
> + request = rcu_dereference(b->first_signal);
> + if (request)
> + request = i915_gem_request_get_rcu(request);
> +
> + barrier();
> +
> + if (!request || request == rcu_access_pointer(b->first_signal))
> + return rcu_pointer_handoff(request);
> +
> + i915_gem_request_put(request);
> + } while (1);
> +}
> +
> static int intel_breadcrumbs_signaler(void *arg)
> {
> struct intel_engine_cs *engine = arg;
> @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> * a new client.
> */
> rcu_read_lock();
> - request = rcu_dereference(b->first_signal);
> - if (request)
> - request = i915_gem_request_get_rcu(request);
> + request = first_signal(b);
get_ prefix would be good to signify a get vs peek. Maybe even _rcu suffix.
> rcu_read_unlock();
> if (signal_complete(request)) {
> - local_bh_disable();
> - dma_fence_signal(&request->fence);
> - local_bh_enable(); /* kick start the tasklets */
> -
> - spin_lock_irq(&b->rb_lock);
> -
> - /* Wake up all other completed waiters and select the
> - * next bottom-half for the next user interrupt.
> - */
> - __intel_engine_remove_wait(engine,
> - &request->signaling.wait);
> -
> - /* Find the next oldest signal. Note that as we have
> - * not been holding the lock, another client may
> - * have installed an even older signal than the one
> - * we just completed - so double check we are still
> - * the oldest before picking the next one.
> - */
> - if (request == rcu_access_pointer(b->first_signal)) {
> - struct rb_node *rb =
> - rb_next(&request->signaling.node);
> - rcu_assign_pointer(b->first_signal,
> - rb ? to_signaler(rb) : NULL);
> + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> + &request->fence.flags)) {
> + local_bh_disable();
> + dma_fence_signal(&request->fence);
> + local_bh_enable(); /* kick start the tasklets */
> }
> - rb_erase(&request->signaling.node, &b->signals);
> - RB_CLEAR_NODE(&request->signaling.node);
> -
> - spin_unlock_irq(&b->rb_lock);
>
> - i915_gem_request_put(request);
> + if (request->signaling.wait.seqno) {
> + spin_lock_irq(&b->rb_lock);
> + __intel_engine_remove_signal(engine, request);
> + spin_unlock_irq(&b->rb_lock);
This is safe against double invocation on a single request (race between
retire and signaler) because of RB_EMPTY_NODE guard in
__intel_engine_remove_wait?
> + }
>
> /* If the engine is saturated we may be continually
> * processing completed requests. This angers the
> @@ -712,19 +740,17 @@ static int intel_breadcrumbs_signaler(void *arg)
> */
> do_schedule = need_resched();
> }
> + i915_gem_request_put(request);
>
> if (unlikely(do_schedule)) {
> if (kthread_should_park())
> kthread_parkme();
>
> - if (unlikely(kthread_should_stop())) {
> - i915_gem_request_put(request);
> + if (unlikely(kthread_should_stop()))
> break;
> - }
>
> schedule();
> }
> - i915_gem_request_put(request);
> } while (1);
> __set_current_state(TASK_RUNNING);
>
> @@ -753,12 +779,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> if (!seqno)
> return;
>
> + spin_lock(&b->rb_lock);
> +
> + GEM_BUG_ON(request->signaling.wait.seqno);
> request->signaling.wait.tsk = b->signaler;
> request->signaling.wait.request = request;
> request->signaling.wait.seqno = seqno;
> - i915_gem_request_get(request);
> -
> - spin_lock(&b->rb_lock);
>
> /* First add ourselves into the list of waiters, but register our
> * bottom-half as the signaller thread. As per usual, only the oldest
> @@ -797,7 +823,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> rcu_assign_pointer(b->first_signal, request);
> } else {
> __intel_engine_remove_wait(engine, &request->signaling.wait);
> - i915_gem_request_put(request);
> + request->signaling.wait.seqno = 0;
> wakeup = false;
> }
>
> @@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>
> void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
> {
> - struct intel_engine_cs *engine = request->engine;
> - struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
> GEM_BUG_ON(!irqs_disabled());
> lockdep_assert_held(&request->lock);
> - GEM_BUG_ON(!request->signaling.wait.seqno);
>
> - spin_lock(&b->rb_lock);
> + if (request->signaling.wait.seqno) {
Do you need some special annotation like READ_ONCE on wait.seqno every
time it is used for decisions like this one?
> + struct intel_engine_cs *engine = request->engine;
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
>
> - if (!RB_EMPTY_NODE(&request->signaling.node)) {
> - if (request == rcu_access_pointer(b->first_signal)) {
> - struct rb_node *rb =
> - rb_next(&request->signaling.node);
> - rcu_assign_pointer(b->first_signal,
> - rb ? to_signaler(rb) : NULL);
> - }
> - rb_erase(&request->signaling.node, &b->signals);
> - RB_CLEAR_NODE(&request->signaling.node);
> - i915_gem_request_put(request);
> + spin_lock(&b->rb_lock);
> + __intel_engine_remove_signal(engine, request);
> + spin_unlock(&b->rb_lock);
> }
> -
> - __intel_engine_remove_wait(engine, &request->signaling.wait);
> -
> - spin_unlock(&b->rb_lock);
> -
> - request->signaling.wait.seqno = 0;
> }
>
> int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
>
Not sure yet, because I dread adding more complex RCU rules. I still
can't understand how submission can overtake interrupt processing and
signaling. I would have guessed submission path is much more CPU heavy
and also lower priority.
At minimum I need another pass over it. But anyway, just so you know I
started looking at it.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-31 17:18 ` Tvrtko Ursulin
@ 2018-01-31 17:30 ` Chris Wilson
2018-01-31 20:40 ` Chris Wilson
2018-02-03 10:19 ` [PATCH v2] " Chris Wilson
1 sibling, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2018-01-31 17:30 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
Quoting Tvrtko Ursulin (2018-01-31 17:18:55)
>
> On 22/01/2018 15:41, Chris Wilson wrote:
> > +static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
> > + struct drm_i915_gem_request *request)
> > +{
> > + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> > +
> > + lockdep_assert_held(&b->rb_lock);
> > +
> > + /*
> > + * Wake up all other completed waiters and select the
> > + * next bottom-half for the next user interrupt.
> > + */
> > + __intel_engine_remove_wait(engine, &request->signaling.wait);
> > +
> > + /*
> > + * Find the next oldest signal. Note that as we have
> > + * not been holding the lock, another client may
>
> Which lock, request-lock? b->first_signal seems to always be updated
> under the b->rb_lock, so I am not sure of the
> request->signaling.wait.seqno check.
>
> wait.seqno is also updated without the request->lock below, so maybe you
> were talking about the rb_lock after all?
What it means here is that since we took b->first_signal without
holding the rb_lock, by the time we get to here (and have taken the
rb_lock) b->first_signal may have changed and indeed someone else may
have already retired the request and canceled the signaling. Or inserted
a new signal into the tree. (iirc, this comment is unmodified.)
> How does wait.seqno becomes 0 outside the rb_lock? Is it due RCU
> business this patch adds? Like it is retired before the signaler runs?
Not outside of the rb_lock, just before we notice.
> > + * have installed an even older signal than the one
> > + * we just completed - so double check we are still
> > + * the oldest before picking the next one.
> > + */
> > + if (request->signaling.wait.seqno) {
> > + if (request == rcu_access_pointer(b->first_signal)) {
> > + struct rb_node *rb = rb_next(&request->signaling.node);
> > + rcu_assign_pointer(b->first_signal,
> > + rb ? to_signaler(rb) : NULL);
> > + }
> > +
> > + rb_erase(&request->signaling.node, &b->signals);
> > + request->signaling.wait.seqno = 0;
> > + }
> > +}
> > +
> > +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> > +{
> > + /*
> > + * See the big warnings for i915_gem_active_get_rcu() and similarly
> > + * for dma_fence_get_rcu_safe() that explain the intricacies involved
> > + * here with defeating CPU/compiler speculation and enforcing
> > + * the required memory barriers.
> > + */
> > + do {
> > + struct drm_i915_gem_request *request;
> > +
> > + request = rcu_dereference(b->first_signal);
> > + if (request)
> > + request = i915_gem_request_get_rcu(request);
> > +
> > + barrier();
> > +
> > + if (!request || request == rcu_access_pointer(b->first_signal))
> > + return rcu_pointer_handoff(request);
> > +
> > + i915_gem_request_put(request);
> > + } while (1);
> > +}
> > +
> > static int intel_breadcrumbs_signaler(void *arg)
> > {
> > struct intel_engine_cs *engine = arg;
> > @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> > * a new client.
> > */
> > rcu_read_lock();
> > - request = rcu_dereference(b->first_signal);
> > - if (request)
> > - request = i915_gem_request_get_rcu(request);
> > + request = first_signal(b);
>
> get_ prefix would be good to signify a get vs peek. Maybe even _rcu suffix.
Sold.
> > rcu_read_unlock();
> > if (signal_complete(request)) {
> > - local_bh_disable();
> > - dma_fence_signal(&request->fence);
> > - local_bh_enable(); /* kick start the tasklets */
> > -
> > - spin_lock_irq(&b->rb_lock);
> > -
> > - /* Wake up all other completed waiters and select the
> > - * next bottom-half for the next user interrupt.
> > - */
> > - __intel_engine_remove_wait(engine,
> > - &request->signaling.wait);
> > -
> > - /* Find the next oldest signal. Note that as we have
> > - * not been holding the lock, another client may
> > - * have installed an even older signal than the one
> > - * we just completed - so double check we are still
> > - * the oldest before picking the next one.
> > - */
> > - if (request == rcu_access_pointer(b->first_signal)) {
> > - struct rb_node *rb =
> > - rb_next(&request->signaling.node);
> > - rcu_assign_pointer(b->first_signal,
> > - rb ? to_signaler(rb) : NULL);
> > + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> > + &request->fence.flags)) {
> > + local_bh_disable();
> > + dma_fence_signal(&request->fence);
> > + local_bh_enable(); /* kick start the tasklets */
> > }
> > - rb_erase(&request->signaling.node, &b->signals);
> > - RB_CLEAR_NODE(&request->signaling.node);
> > -
> > - spin_unlock_irq(&b->rb_lock);
> >
> > - i915_gem_request_put(request);
> > + if (request->signaling.wait.seqno) {
> > + spin_lock_irq(&b->rb_lock);
> > + __intel_engine_remove_signal(engine, request);
> > + spin_unlock_irq(&b->rb_lock);
>
> This is safe against double invocation on a single request (race between
> retire and signaler) because of RB_EMPTY_NODE guard in
> __intel_engine_remove_wait?
Yes, and wait.seqno==0 guard for the signaler tree.
> > @@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> >
> > void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
> > {
> > - struct intel_engine_cs *engine = request->engine;
> > - struct intel_breadcrumbs *b = &engine->breadcrumbs;
> > -
> > GEM_BUG_ON(!irqs_disabled());
> > lockdep_assert_held(&request->lock);
> > - GEM_BUG_ON(!request->signaling.wait.seqno);
> >
> > - spin_lock(&b->rb_lock);
> > + if (request->signaling.wait.seqno) {
>
> Do you need some special annotation like READ_ONCE on wait.seqno every
> time it is used for decisions like this one?
For humans, yes ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-31 17:30 ` Chris Wilson
@ 2018-01-31 20:40 ` Chris Wilson
2018-01-31 20:43 ` Chris Wilson
0 siblings, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2018-01-31 20:40 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
Quoting Chris Wilson (2018-01-31 17:30:46)
> Quoting Tvrtko Ursulin (2018-01-31 17:18:55)
> >
> > On 22/01/2018 15:41, Chris Wilson wrote:
> > > +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> > > +{
> > > + /*
> > > + * See the big warnings for i915_gem_active_get_rcu() and similarly
> > > + * for dma_fence_get_rcu_safe() that explain the intricacies involved
> > > + * here with defeating CPU/compiler speculation and enforcing
> > > + * the required memory barriers.
> > > + */
> > > + do {
> > > + struct drm_i915_gem_request *request;
> > > +
> > > + request = rcu_dereference(b->first_signal);
> > > + if (request)
> > > + request = i915_gem_request_get_rcu(request);
> > > +
> > > + barrier();
> > > +
> > > + if (!request || request == rcu_access_pointer(b->first_signal))
> > > + return rcu_pointer_handoff(request);
> > > +
> > > + i915_gem_request_put(request);
> > > + } while (1);
> > > +}
> > > +
> > > static int intel_breadcrumbs_signaler(void *arg)
> > > {
> > > struct intel_engine_cs *engine = arg;
> > > @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> > > * a new client.
> > > */
> > > rcu_read_lock();
> > > - request = rcu_dereference(b->first_signal);
> > > - if (request)
> > > - request = i915_gem_request_get_rcu(request);
> > > + request = first_signal(b);
> >
> > get_ prefix would be good to signify a get vs peek. Maybe even _rcu suffix.
>
> Sold.
Whilst you are looking at this, I should just say that first_signal() is
what we should have been doing all this time; it's really a very obscure
bug fix. And fwiw, the s/rbtree/list/ patch eliminates it.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-31 20:40 ` Chris Wilson
@ 2018-01-31 20:43 ` Chris Wilson
0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-01-31 20:43 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
Quoting Chris Wilson (2018-01-31 20:40:03)
> Quoting Chris Wilson (2018-01-31 17:30:46)
> > Quoting Tvrtko Ursulin (2018-01-31 17:18:55)
> > >
> > > On 22/01/2018 15:41, Chris Wilson wrote:
> > > > +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> > > > +{
> > > > + /*
> > > > + * See the big warnings for i915_gem_active_get_rcu() and similarly
> > > > + * for dma_fence_get_rcu_safe() that explain the intricacies involved
> > > > + * here with defeating CPU/compiler speculation and enforcing
> > > > + * the required memory barriers.
> > > > + */
> > > > + do {
> > > > + struct drm_i915_gem_request *request;
> > > > +
> > > > + request = rcu_dereference(b->first_signal);
> > > > + if (request)
> > > > + request = i915_gem_request_get_rcu(request);
> > > > +
> > > > + barrier();
> > > > +
> > > > + if (!request || request == rcu_access_pointer(b->first_signal))
> > > > + return rcu_pointer_handoff(request);
> > > > +
> > > > + i915_gem_request_put(request);
> > > > + } while (1);
> > > > +}
> > > > +
> > > > static int intel_breadcrumbs_signaler(void *arg)
> > > > {
> > > > struct intel_engine_cs *engine = arg;
> > > > @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> > > > * a new client.
> > > > */
> > > > rcu_read_lock();
> > > > - request = rcu_dereference(b->first_signal);
> > > > - if (request)
> > > > - request = i915_gem_request_get_rcu(request);
> > > > + request = first_signal(b);
> > >
> > > get_ prefix would be good to signify a get vs peek. Maybe even _rcu suffix.
> >
> > Sold.
>
> Whilst you are looking at this, I should just say that first_signal() is
> what we should have been doing all this time; it's really a very obscure
> bug fix. And fwiw, the s/rbtree/list/ patch eliminates it.
Hmm, actually no, no underlying bug here as the reference was previously
carried by the signaler, and with its removal do we need to do the
double dance. (It's a bad sign when I can remember having that exact
conversation with myself; and forgot until too late.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-01-31 17:18 ` Tvrtko Ursulin
2018-01-31 17:30 ` Chris Wilson
@ 2018-02-03 10:19 ` Chris Wilson
2018-02-05 13:23 ` Tvrtko Ursulin
1 sibling, 1 reply; 21+ messages in thread
From: Chris Wilson @ 2018-02-03 10:19 UTC (permalink / raw)
To: intel-gfx
If we remember to cancel the signaler on a request when retiring it
(after we know that the request has been signaled), we do not need to
carry an additional request in the signaler itself. This prevents an
issue whereby the signaler threads may be delayed and hold on to
thousands of request references, causing severe memory fragmentation and
premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
and frequent use of inter-engine fences).
v2: Rename first_signal(), document reads outside of locks.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_gem_request.c | 6 +-
drivers/gpu/drm/i915/intel_breadcrumbs.c | 150 +++++++++++++++++--------------
2 files changed, 86 insertions(+), 70 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5ff57211ee06..dc7cc2c55bc7 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -555,7 +555,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
spin_lock_irq(&request->lock);
if (request->waitboost)
atomic_dec(&request->i915->gt_pm.rps.num_waiters);
- dma_fence_signal_locked(&request->fence);
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
+ dma_fence_signal_locked(&request->fence);
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+ intel_engine_cancel_signaling(request);
spin_unlock_irq(&request->lock);
i915_priotree_fini(request->i915, &request->priotree);
@@ -864,6 +867,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
/* No zalloc, must clear what we need by hand */
req->global_seqno = 0;
+ req->signaling.wait.seqno = 0;
req->file_priv = NULL;
req->batch = NULL;
req->capture_list = NULL;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 62b2a20bc24e..efbc627a2a25 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -224,7 +224,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
struct intel_wait *wait, *n;
if (!b->irq_armed)
- goto wakeup_signaler;
+ return;
/*
* We only disarm the irq when we are idle (all requests completed),
@@ -249,14 +249,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
b->waiters = RB_ROOT;
spin_unlock_irq(&b->rb_lock);
-
- /*
- * The signaling thread may be asleep holding a reference to a request,
- * that had its signaling cancelled prior to being preempted. We need
- * to kick the signaler, just in case, to release any such reference.
- */
-wakeup_signaler:
- wake_up_process(b->signaler);
}
static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -633,6 +625,63 @@ static void signaler_set_rtpriority(void)
sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
}
+static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
+ struct drm_i915_gem_request *request)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ lockdep_assert_held(&b->rb_lock);
+
+ /*
+ * Wake up all other completed waiters and select the
+ * next bottom-half for the next user interrupt.
+ */
+ __intel_engine_remove_wait(engine, &request->signaling.wait);
+
+ /*
+ * Find the next oldest signal. Note that as we have
+ * not been holding the lock, another client may
+ * have installed an even older signal than the one
+ * we just completed - so double check we are still
+ * the oldest before picking the next one.
+ */
+ if (request->signaling.wait.seqno) {
+ if (request == rcu_access_pointer(b->first_signal)) {
+ struct rb_node *rb = rb_next(&request->signaling.node);
+ rcu_assign_pointer(b->first_signal,
+ rb ? to_signaler(rb) : NULL);
+ }
+
+ rb_erase(&request->signaling.node, &b->signals);
+ request->signaling.wait.seqno = 0;
+ }
+}
+
+static struct drm_i915_gem_request *
+get_first_signal_rcu(struct intel_breadcrumbs *b)
+{
+ /*
+ * See the big warnings for i915_gem_active_get_rcu() and similarly
+ * for dma_fence_get_rcu_safe() that explain the intricacies involved
+ * here with defeating CPU/compiler speculation and enforcing
+ * the required memory barriers.
+ */
+ do {
+ struct drm_i915_gem_request *request;
+
+ request = rcu_dereference(b->first_signal);
+ if (request)
+ request = i915_gem_request_get_rcu(request);
+
+ barrier();
+
+ if (!request || request == rcu_access_pointer(b->first_signal))
+ return rcu_pointer_handoff(request);
+
+ i915_gem_request_put(request);
+ } while (1);
+}
+
static int intel_breadcrumbs_signaler(void *arg)
{
struct intel_engine_cs *engine = arg;
@@ -656,41 +705,21 @@ static int intel_breadcrumbs_signaler(void *arg)
* a new client.
*/
rcu_read_lock();
- request = rcu_dereference(b->first_signal);
- if (request)
- request = i915_gem_request_get_rcu(request);
+ request = get_first_signal_rcu(b);
rcu_read_unlock();
if (signal_complete(request)) {
- local_bh_disable();
- dma_fence_signal(&request->fence);
- local_bh_enable(); /* kick start the tasklets */
-
- spin_lock_irq(&b->rb_lock);
-
- /* Wake up all other completed waiters and select the
- * next bottom-half for the next user interrupt.
- */
- __intel_engine_remove_wait(engine,
- &request->signaling.wait);
-
- /* Find the next oldest signal. Note that as we have
- * not been holding the lock, another client may
- * have installed an even older signal than the one
- * we just completed - so double check we are still
- * the oldest before picking the next one.
- */
- if (request == rcu_access_pointer(b->first_signal)) {
- struct rb_node *rb =
- rb_next(&request->signaling.node);
- rcu_assign_pointer(b->first_signal,
- rb ? to_signaler(rb) : NULL);
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &request->fence.flags)) {
+ local_bh_disable();
+ dma_fence_signal(&request->fence);
+ local_bh_enable(); /* kick start the tasklets */
}
- rb_erase(&request->signaling.node, &b->signals);
- RB_CLEAR_NODE(&request->signaling.node);
-
- spin_unlock_irq(&b->rb_lock);
- i915_gem_request_put(request);
+ if (READ_ONCE(request->signaling.wait.seqno)) {
+ spin_lock_irq(&b->rb_lock);
+ __intel_engine_remove_signal(engine, request);
+ spin_unlock_irq(&b->rb_lock);
+ }
/* If the engine is saturated we may be continually
* processing completed requests. This angers the
@@ -701,19 +730,17 @@ static int intel_breadcrumbs_signaler(void *arg)
*/
do_schedule = need_resched();
}
+ i915_gem_request_put(request);
if (unlikely(do_schedule)) {
if (kthread_should_park())
kthread_parkme();
- if (unlikely(kthread_should_stop())) {
- i915_gem_request_put(request);
+ if (unlikely(kthread_should_stop()))
break;
- }
schedule();
}
- i915_gem_request_put(request);
} while (1);
__set_current_state(TASK_RUNNING);
@@ -742,12 +769,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
if (!seqno)
return;
+ spin_lock(&b->rb_lock);
+
+ GEM_BUG_ON(request->signaling.wait.seqno);
request->signaling.wait.tsk = b->signaler;
request->signaling.wait.request = request;
request->signaling.wait.seqno = seqno;
- i915_gem_request_get(request);
-
- spin_lock(&b->rb_lock);
/* First add ourselves into the list of waiters, but register our
* bottom-half as the signaller thread. As per usual, only the oldest
@@ -786,7 +813,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
rcu_assign_pointer(b->first_signal, request);
} else {
__intel_engine_remove_wait(engine, &request->signaling.wait);
- i915_gem_request_put(request);
+ request->signaling.wait.seqno = 0;
wakeup = false;
}
@@ -798,32 +825,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
{
- struct intel_engine_cs *engine = request->engine;
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&request->lock);
- GEM_BUG_ON(!request->signaling.wait.seqno);
- spin_lock(&b->rb_lock);
+ if (READ_ONCE(request->signaling.wait.seqno)) {
+ struct intel_engine_cs *engine = request->engine;
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
- if (!RB_EMPTY_NODE(&request->signaling.node)) {
- if (request == rcu_access_pointer(b->first_signal)) {
- struct rb_node *rb =
- rb_next(&request->signaling.node);
- rcu_assign_pointer(b->first_signal,
- rb ? to_signaler(rb) : NULL);
- }
- rb_erase(&request->signaling.node, &b->signals);
- RB_CLEAR_NODE(&request->signaling.node);
- i915_gem_request_put(request);
+ spin_lock(&b->rb_lock);
+ __intel_engine_remove_signal(engine, request);
+ spin_unlock(&b->rb_lock);
}
-
- __intel_engine_remove_wait(engine, &request->signaling.wait);
-
- spin_unlock(&b->rb_lock);
-
- request->signaling.wait.seqno = 0;
}
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
--
2.15.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 21+ messages in thread
* ✓ Fi.CI.BAT: success for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
` (5 preceding siblings ...)
2018-01-31 17:18 ` Tvrtko Ursulin
@ 2018-02-03 10:40 ` Patchwork
2018-02-03 11:32 ` ✗ Fi.CI.IGT: failure " Patchwork
7 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2018-02-03 10:40 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
URL : https://patchwork.freedesktop.org/series/36908/
State : success
== Summary ==
Series 36908v2 drm/i915/breadcrumbs: Drop request reference for the signaler thread
https://patchwork.freedesktop.org/api/1.0/series/36908/revisions/2/mbox/
Test gem_mmap_gtt:
Subgroup basic-small-bo-tiledx:
pass -> FAIL (fi-gdg-551) fdo#102575
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-b:
pass -> INCOMPLETE (fi-snb-2520m) fdo#103713
fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:421s
fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:425s
fi-blb-e6850 total:288 pass:223 dwarn:1 dfail:0 fail:0 skip:64 time:370s
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:481s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:280s
fi-bxt-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:476s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:487s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:465s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:463s
fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:568s
fi-elk-e7500 total:288 pass:229 dwarn:0 dfail:0 fail:0 skip:59 time:410s
fi-gdg-551 total:288 pass:179 dwarn:0 dfail:0 fail:1 skip:108 time:277s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:513s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:386s
fi-hsw-4770r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:395s
fi-ilk-650 total:288 pass:228 dwarn:0 dfail:0 fail:0 skip:60 time:408s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:468s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:410s
fi-kbl-7500u total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:460s
fi-kbl-7560u total:288 pass:269 dwarn:0 dfail:0 fail:0 skip:19 time:495s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:452s
fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:497s
fi-pnv-d510 total:288 pass:222 dwarn:1 dfail:0 fail:0 skip:65 time:572s
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:428s
fi-skl-6600u total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:506s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:527s
fi-skl-6700k2 total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:483s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:473s
fi-skl-guc total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:414s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:429s
fi-snb-2520m total:245 pass:211 dwarn:0 dfail:0 fail:0 skip:33
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:392s
Blacklisted hosts:
fi-glk-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:470s
2e76a2952923eba64c4f9baf461613bc42ee997a drm-tip: 2018y-02m-02d-20h-33m-12s UTC integration manifest
60b9a2f8d1e9 drm/i915/breadcrumbs: Drop request reference for the signaler thread
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7878/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* ✗ Fi.CI.IGT: failure for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
` (6 preceding siblings ...)
2018-02-03 10:40 ` ✓ Fi.CI.BAT: success for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2) Patchwork
@ 2018-02-03 11:32 ` Patchwork
2018-02-05 16:09 ` Chris Wilson
7 siblings, 1 reply; 21+ messages in thread
From: Patchwork @ 2018-02-03 11:32 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
URL : https://patchwork.freedesktop.org/series/36908/
State : failure
== Summary ==
Test perf_pmu:
Subgroup busy-double-start-vcs0:
pass -> INCOMPLETE (shard-apl)
Test kms_cursor_legacy:
Subgroup flip-vs-cursor-atomic:
fail -> PASS (shard-hsw) fdo#102670 +1
Test kms_flip:
Subgroup flip-vs-absolute-wf_vblank-interruptible:
pass -> FAIL (shard-hsw) fdo#100368
Subgroup 2x-flip-vs-expired-vblank-interruptible:
fail -> PASS (shard-hsw) fdo#102887
Subgroup 2x-plain-flip-ts-check:
fail -> PASS (shard-hsw)
Test kms_rotation_crc:
Subgroup bad-tiling:
pass -> FAIL (shard-apl) fdo#103925
Test gem_eio:
Subgroup in-flight-contexts:
dmesg-warn -> PASS (shard-snb) fdo#104058
Test kms_draw_crc:
Subgroup fill-fb:
pass -> SKIP (shard-snb)
fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
fdo#104058 https://bugs.freedesktop.org/show_bug.cgi?id=104058
shard-apl total:2795 pass:1724 dwarn:1 dfail:0 fail:20 skip:1049 time:12065s
shard-hsw total:2836 pass:1732 dwarn:1 dfail:0 fail:12 skip:1090 time:11572s
shard-snb total:2836 pass:1327 dwarn:1 dfail:0 fail:10 skip:1498 time:6385s
Blacklisted hosts:
shard-kbl total:2836 pass:1870 dwarn:3 dfail:0 fail:22 skip:941 time:9372s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7878/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-02-03 10:19 ` [PATCH v2] " Chris Wilson
@ 2018-02-05 13:23 ` Tvrtko Ursulin
2018-02-05 13:36 ` Chris Wilson
0 siblings, 1 reply; 21+ messages in thread
From: Tvrtko Ursulin @ 2018-02-05 13:23 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 03/02/2018 10:19, Chris Wilson wrote:
> If we remember to cancel the signaler on a request when retiring it
> (after we know that the request has been signaled), we do not need to
> carry an additional request in the signaler itself. This prevents an
> issue whereby the signaler threads may be delayed and hold on to
> thousands of request references, causing severe memory fragmentation and
> premature oom (most noticeable on 32b snb due to the limited GFP_KERNEL
> and frequent use of inter-engine fences).
>
> v2: Rename first_signal(), document reads outside of locks.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_request.c | 6 +-
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 150 +++++++++++++++++--------------
> 2 files changed, 86 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 5ff57211ee06..dc7cc2c55bc7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -555,7 +555,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> spin_lock_irq(&request->lock);
> if (request->waitboost)
> atomic_dec(&request->i915->gt_pm.rps.num_waiters);
> - dma_fence_signal_locked(&request->fence);
> + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
> + dma_fence_signal_locked(&request->fence);
> + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> + intel_engine_cancel_signaling(request);
> spin_unlock_irq(&request->lock);
>
> i915_priotree_fini(request->i915, &request->priotree);
> @@ -864,6 +867,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>
> /* No zalloc, must clear what we need by hand */
> req->global_seqno = 0;
> + req->signaling.wait.seqno = 0;
> req->file_priv = NULL;
> req->batch = NULL;
> req->capture_list = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 62b2a20bc24e..efbc627a2a25 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -224,7 +224,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> struct intel_wait *wait, *n;
>
> if (!b->irq_armed)
> - goto wakeup_signaler;
> + return;
>
> /*
> * We only disarm the irq when we are idle (all requests completed),
> @@ -249,14 +249,6 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> b->waiters = RB_ROOT;
>
> spin_unlock_irq(&b->rb_lock);
> -
> - /*
> - * The signaling thread may be asleep holding a reference to a request,
> - * that had its signaling cancelled prior to being preempted. We need
> - * to kick the signaler, just in case, to release any such reference.
> - */
> -wakeup_signaler:
> - wake_up_process(b->signaler);
> }
>
> static bool use_fake_irq(const struct intel_breadcrumbs *b)
> @@ -633,6 +625,63 @@ static void signaler_set_rtpriority(void)
> sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
> }
>
> +static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
> + struct drm_i915_gem_request *request)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> + lockdep_assert_held(&b->rb_lock);
> +
> + /*
> + * Wake up all other completed waiters and select the
> + * next bottom-half for the next user interrupt.
> + */
> + __intel_engine_remove_wait(engine, &request->signaling.wait);
> +
> + /*
> + * Find the next oldest signal. Note that as we have
> + * not been holding the lock, another client may
> + * have installed an even older signal than the one
> + * we just completed - so double check we are still
> + * the oldest before picking the next one.
> + */
> + if (request->signaling.wait.seqno) {
> + if (request == rcu_access_pointer(b->first_signal)) {
> + struct rb_node *rb = rb_next(&request->signaling.node);
> + rcu_assign_pointer(b->first_signal,
> + rb ? to_signaler(rb) : NULL);
> + }
> +
> + rb_erase(&request->signaling.node, &b->signals);
> + request->signaling.wait.seqno = 0;
> + }
> +}
> +
> +static struct drm_i915_gem_request *
> +get_first_signal_rcu(struct intel_breadcrumbs *b)
> +{
> + /*
> + * See the big warnings for i915_gem_active_get_rcu() and similarly
> + * for dma_fence_get_rcu_safe() that explain the intricacies involved
> + * here with defeating CPU/compiler speculation and enforcing
> + * the required memory barriers.
> + */
> + do {
> + struct drm_i915_gem_request *request;
> +
> + request = rcu_dereference(b->first_signal);
> + if (request)
> + request = i915_gem_request_get_rcu(request);
> +
> + barrier();
> +
> + if (!request || request == rcu_access_pointer(b->first_signal))
> + return rcu_pointer_handoff(request);
> +
> + i915_gem_request_put(request);
> + } while (1);
> +}
> +
> static int intel_breadcrumbs_signaler(void *arg)
> {
> struct intel_engine_cs *engine = arg;
> @@ -656,41 +705,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> * a new client.
> */
> rcu_read_lock();
> - request = rcu_dereference(b->first_signal);
> - if (request)
> - request = i915_gem_request_get_rcu(request);
> + request = get_first_signal_rcu(b);
> rcu_read_unlock();
> if (signal_complete(request)) {
> - local_bh_disable();
> - dma_fence_signal(&request->fence);
> - local_bh_enable(); /* kick start the tasklets */
> -
> - spin_lock_irq(&b->rb_lock);
> -
> - /* Wake up all other completed waiters and select the
> - * next bottom-half for the next user interrupt.
> - */
> - __intel_engine_remove_wait(engine,
> - &request->signaling.wait);
> -
> - /* Find the next oldest signal. Note that as we have
> - * not been holding the lock, another client may
> - * have installed an even older signal than the one
> - * we just completed - so double check we are still
> - * the oldest before picking the next one.
> - */
> - if (request == rcu_access_pointer(b->first_signal)) {
> - struct rb_node *rb =
> - rb_next(&request->signaling.node);
> - rcu_assign_pointer(b->first_signal,
> - rb ? to_signaler(rb) : NULL);
> + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> + &request->fence.flags)) {
> + local_bh_disable();
> + dma_fence_signal(&request->fence);
> + local_bh_enable(); /* kick start the tasklets */
> }
> - rb_erase(&request->signaling.node, &b->signals);
> - RB_CLEAR_NODE(&request->signaling.node);
> -
> - spin_unlock_irq(&b->rb_lock);
>
> - i915_gem_request_put(request);
> + if (READ_ONCE(request->signaling.wait.seqno)) {
> + spin_lock_irq(&b->rb_lock);
> + __intel_engine_remove_signal(engine, request);
> + spin_unlock_irq(&b->rb_lock);
> + }
How would you view taking the request->lock over this block in the
signaler and then being able to call simply
intel_engine_cancel_signaling - ending up with very similar code as in
i915_gem_request_retire?
Only difference would be the tasklet kicking, but maybe still it would
be possible to consolidate the two in one helper.
__i915_gem_request_retire_signaling(req, bool kick_tasklets)
{
if (!DMA_FENCE_FLAG_SIGNALED_BIT) {
dma_fence_signal_locked(...);
if (kick_tasklets) {
local_bh_disable();
local_bh_enable();
}
}
if (DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT)
intel_engine_cancel_signaling(...);
}
i915_gem_request_retire_signaling(req)
{
spin_lock_irq(req->lock);
__i915_gem_request_retire_signaling(req, true);
spin_unlock_irq(req->lock);
}
And call the former from request retire, and later from signaler?
Regards,
Tvrtko
>
> /* If the engine is saturated we may be continually
> * processing completed requests. This angers the
> @@ -701,19 +730,17 @@ static int intel_breadcrumbs_signaler(void *arg)
> */
> do_schedule = need_resched();
> }
> + i915_gem_request_put(request);
>
> if (unlikely(do_schedule)) {
> if (kthread_should_park())
> kthread_parkme();
>
> - if (unlikely(kthread_should_stop())) {
> - i915_gem_request_put(request);
> + if (unlikely(kthread_should_stop()))
> break;
> - }
>
> schedule();
> }
> - i915_gem_request_put(request);
> } while (1);
> __set_current_state(TASK_RUNNING);
>
> @@ -742,12 +769,12 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> if (!seqno)
> return;
>
> + spin_lock(&b->rb_lock);
> +
> + GEM_BUG_ON(request->signaling.wait.seqno);
> request->signaling.wait.tsk = b->signaler;
> request->signaling.wait.request = request;
> request->signaling.wait.seqno = seqno;
> - i915_gem_request_get(request);
> -
> - spin_lock(&b->rb_lock);
>
> /* First add ourselves into the list of waiters, but register our
> * bottom-half as the signaller thread. As per usual, only the oldest
> @@ -786,7 +813,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> rcu_assign_pointer(b->first_signal, request);
> } else {
> __intel_engine_remove_wait(engine, &request->signaling.wait);
> - i915_gem_request_put(request);
> + request->signaling.wait.seqno = 0;
> wakeup = false;
> }
>
> @@ -798,32 +825,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
>
> void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
> {
> - struct intel_engine_cs *engine = request->engine;
> - struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
> GEM_BUG_ON(!irqs_disabled());
> lockdep_assert_held(&request->lock);
> - GEM_BUG_ON(!request->signaling.wait.seqno);
>
> - spin_lock(&b->rb_lock);
> + if (READ_ONCE(request->signaling.wait.seqno)) {
> + struct intel_engine_cs *engine = request->engine;
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
>
> - if (!RB_EMPTY_NODE(&request->signaling.node)) {
> - if (request == rcu_access_pointer(b->first_signal)) {
> - struct rb_node *rb =
> - rb_next(&request->signaling.node);
> - rcu_assign_pointer(b->first_signal,
> - rb ? to_signaler(rb) : NULL);
> - }
> - rb_erase(&request->signaling.node, &b->signals);
> - RB_CLEAR_NODE(&request->signaling.node);
> - i915_gem_request_put(request);
> + spin_lock(&b->rb_lock);
> + __intel_engine_remove_signal(engine, request);
> + spin_unlock(&b->rb_lock);
> }
> -
> - __intel_engine_remove_wait(engine, &request->signaling.wait);
> -
> - spin_unlock(&b->rb_lock);
> -
> - request->signaling.wait.seqno = 0;
> }
>
> int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-02-05 13:23 ` Tvrtko Ursulin
@ 2018-02-05 13:36 ` Chris Wilson
2018-02-05 13:39 ` Chris Wilson
2018-02-05 13:45 ` Tvrtko Ursulin
0 siblings, 2 replies; 21+ messages in thread
From: Chris Wilson @ 2018-02-05 13:36 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
Quoting Tvrtko Ursulin (2018-02-05 13:23:54)
>
> On 03/02/2018 10:19, Chris Wilson wrote:
> > @@ -656,41 +705,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> > * a new client.
> > */
> > rcu_read_lock();
> > - request = rcu_dereference(b->first_signal);
> > - if (request)
> > - request = i915_gem_request_get_rcu(request);
> > + request = get_first_signal_rcu(b);
> > rcu_read_unlock();
> > if (signal_complete(request)) {
> > - local_bh_disable();
> > - dma_fence_signal(&request->fence);
> > - local_bh_enable(); /* kick start the tasklets */
> > -
> > - spin_lock_irq(&b->rb_lock);
> > -
> > - /* Wake up all other completed waiters and select the
> > - * next bottom-half for the next user interrupt.
> > - */
> > - __intel_engine_remove_wait(engine,
> > - &request->signaling.wait);
> > -
> > - /* Find the next oldest signal. Note that as we have
> > - * not been holding the lock, another client may
> > - * have installed an even older signal than the one
> > - * we just completed - so double check we are still
> > - * the oldest before picking the next one.
> > - */
> > - if (request == rcu_access_pointer(b->first_signal)) {
> > - struct rb_node *rb =
> > - rb_next(&request->signaling.node);
> > - rcu_assign_pointer(b->first_signal,
> > - rb ? to_signaler(rb) : NULL);
> > + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> > + &request->fence.flags)) {
> > + local_bh_disable();
> > + dma_fence_signal(&request->fence);
> > + local_bh_enable(); /* kick start the tasklets */
> > }
> > - rb_erase(&request->signaling.node, &b->signals);
> > - RB_CLEAR_NODE(&request->signaling.node);
> > -
> > - spin_unlock_irq(&b->rb_lock);
> >
> > - i915_gem_request_put(request);
> > + if (READ_ONCE(request->signaling.wait.seqno)) {
> > + spin_lock_irq(&b->rb_lock);
> > + __intel_engine_remove_signal(engine, request);
> > + spin_unlock_irq(&b->rb_lock);
> > + }
>
> How would you view taking the request->lock over this block in the
> signaler and then being able to call simply
> intel_engine_cancel_signaling - ending up with very similar code as in
> i915_gem_request_retire?
I am not keen on the conflation here (maybe it's just a hatred of bool
parameters). But at first glance it's just the commonality of
remove_signal, which is already a common routine?
> Only difference would be the tasklet kicking, but maybe still it would
> be possible to consolidate the two in one helper.
>
> __i915_gem_request_retire_signaling(req, bool kick_tasklets)
> {
> if (!DMA_FENCE_FLAG_SIGNALED_BIT) {
> dma_fence_signal_locked(...);
> if (kick_tasklets) {
> local_bh_disable();
> local_bh_enable();
> }
We can't kick the tasklet inside a spinlock. Especially not a lockclass
as nested as request.lock :(
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-02-05 13:36 ` Chris Wilson
@ 2018-02-05 13:39 ` Chris Wilson
2018-02-05 13:45 ` Tvrtko Ursulin
1 sibling, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-02-05 13:39 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
Quoting Chris Wilson (2018-02-05 13:36:36)
> Quoting Tvrtko Ursulin (2018-02-05 13:23:54)
> >
> > On 03/02/2018 10:19, Chris Wilson wrote:
> > > @@ -656,41 +705,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> > > * a new client.
> > > */
> > > rcu_read_lock();
> > > - request = rcu_dereference(b->first_signal);
> > > - if (request)
> > > - request = i915_gem_request_get_rcu(request);
> > > + request = get_first_signal_rcu(b);
> > > rcu_read_unlock();
> > > if (signal_complete(request)) {
> > > - local_bh_disable();
> > > - dma_fence_signal(&request->fence);
> > > - local_bh_enable(); /* kick start the tasklets */
> > > -
> > > - spin_lock_irq(&b->rb_lock);
> > > -
> > > - /* Wake up all other completed waiters and select the
> > > - * next bottom-half for the next user interrupt.
> > > - */
> > > - __intel_engine_remove_wait(engine,
> > > - &request->signaling.wait);
> > > -
> > > - /* Find the next oldest signal. Note that as we have
> > > - * not been holding the lock, another client may
> > > - * have installed an even older signal than the one
> > > - * we just completed - so double check we are still
> > > - * the oldest before picking the next one.
> > > - */
> > > - if (request == rcu_access_pointer(b->first_signal)) {
> > > - struct rb_node *rb =
> > > - rb_next(&request->signaling.node);
> > > - rcu_assign_pointer(b->first_signal,
> > > - rb ? to_signaler(rb) : NULL);
> > > + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> > > + &request->fence.flags)) {
> > > + local_bh_disable();
> > > + dma_fence_signal(&request->fence);
> > > + local_bh_enable(); /* kick start the tasklets */
> > > }
> > > - rb_erase(&request->signaling.node, &b->signals);
> > > - RB_CLEAR_NODE(&request->signaling.node);
> > > -
> > > - spin_unlock_irq(&b->rb_lock);
> > >
> > > - i915_gem_request_put(request);
> > > + if (READ_ONCE(request->signaling.wait.seqno)) {
> > > + spin_lock_irq(&b->rb_lock);
> > > + __intel_engine_remove_signal(engine, request);
> > > + spin_unlock_irq(&b->rb_lock);
> > > + }
> >
> > How would you view taking the request->lock over this block in the
> > signaler and then being able to call simply
> > intel_engine_cancel_signaling - ending up with very similar code as in
> > i915_gem_request_retire?
>
> I am not keen on the conflation here (maybe it's just a hatred of bool
> parameters). But at first glance it's just the commonality of
> remove_signal, which is already a common routine?
>
> > Only difference would be the tasklet kicking, but maybe still it would
> > be possible to consolidate the two in one helper.
> >
> > __i915_gem_request_retire_signaling(req, bool kick_tasklets)
> > {
> > if (!DMA_FENCE_FLAG_SIGNALED_BIT) {
> > dma_fence_signal_locked(...);
> > if (kick_tasklets) {
> > local_bh_disable();
> > local_bh_enable();
> > }
>
> We can't kick the tasklet inside a spinlock. Especially not a lockclass
> as nested as request.lock :(
Also bear in mind this is just an intermediate step in my plans. :)
static int intel_breadcrumbs_signaler(void *arg)
{
struct intel_engine_cs *engine = arg;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct drm_i915_gem_request *rq, *n;
/* Install ourselves with high priority to reduce signalling latency */
signaler_set_rtpriority();
do {
bool do_schedule = true;
LIST_HEAD(list);
u32 seqno;
set_current_state(TASK_INTERRUPTIBLE);
if (list_empty(&b->signals))
goto sleep;
/*
* We are either woken up by the interrupt bottom-half,
* or by a client adding a new signaller. In both cases,
* the GPU seqno may have advanced beyond our oldest signal.
* If it has, propagate the signal, remove the waiter and
* check again with the next oldest signal. Otherwise we
* need to wait for a new interrupt from the GPU or for
* a new client.
*/
seqno = intel_engine_get_seqno(engine);
spin_lock_irq(&b->rb_lock);
list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
u32 this = rq->signaling.wait.seqno;
GEM_BUG_ON(!rq->signaling.wait.seqno);
if (!i915_seqno_passed(seqno, this))
break;
if (this == i915_gem_request_global_seqno(rq)) {
__intel_engine_remove_wait(engine,
&rq->signaling.wait);
rq->signaling.wait.seqno = 0;
__list_del_entry(&rq->signaling.link);
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&rq->fence.flags)) {
list_add_tail(&rq->signaling.link,
&list);
i915_gem_request_get(rq);
}
}
}
spin_unlock_irq(&b->rb_lock);
if (!list_empty(&list)) {
local_bh_disable();
list_for_each_entry_safe(rq, n, &list, signaling.link) {
dma_fence_signal(&rq->fence);
i915_gem_request_put(rq);
}
local_bh_enable(); /* kick start the tasklets */
/*
* If the engine is saturated we may be continually
* processing completed requests. This angers the
* NMI watchdog if we never let anything else
* have access to the CPU. Let's pretend to be nice
* and relinquish the CPU if we burn through the
* entire RT timeslice!
*/
do_schedule = need_resched();
}
if (unlikely(do_schedule)) {
if (current->state & TASK_NORMAL &&
!list_empty(&b->signals) &&
engine->irq_seqno_barrier &&
test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
&engine->irq_posted)) {
engine->irq_seqno_barrier(engine);
intel_engine_wakeup(engine);
}
sleep:
if (kthread_should_park())
kthread_parkme();
if (unlikely(kthread_should_stop()))
break;
schedule();
}
} while (1);
__set_current_state(TASK_RUNNING);
return 0;
}
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-02-05 13:36 ` Chris Wilson
2018-02-05 13:39 ` Chris Wilson
@ 2018-02-05 13:45 ` Tvrtko Ursulin
2018-02-05 16:11 ` Chris Wilson
1 sibling, 1 reply; 21+ messages in thread
From: Tvrtko Ursulin @ 2018-02-05 13:45 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 05/02/2018 13:36, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-02-05 13:23:54)
>>
>> On 03/02/2018 10:19, Chris Wilson wrote:
>>> @@ -656,41 +705,21 @@ static int intel_breadcrumbs_signaler(void *arg)
>>> * a new client.
>>> */
>>> rcu_read_lock();
>>> - request = rcu_dereference(b->first_signal);
>>> - if (request)
>>> - request = i915_gem_request_get_rcu(request);
>>> + request = get_first_signal_rcu(b);
>>> rcu_read_unlock();
>>> if (signal_complete(request)) {
>>> - local_bh_disable();
>>> - dma_fence_signal(&request->fence);
>>> - local_bh_enable(); /* kick start the tasklets */
>>> -
>>> - spin_lock_irq(&b->rb_lock);
>>> -
>>> - /* Wake up all other completed waiters and select the
>>> - * next bottom-half for the next user interrupt.
>>> - */
>>> - __intel_engine_remove_wait(engine,
>>> - &request->signaling.wait);
>>> -
>>> - /* Find the next oldest signal. Note that as we have
>>> - * not been holding the lock, another client may
>>> - * have installed an even older signal than the one
>>> - * we just completed - so double check we are still
>>> - * the oldest before picking the next one.
>>> - */
>>> - if (request == rcu_access_pointer(b->first_signal)) {
>>> - struct rb_node *rb =
>>> - rb_next(&request->signaling.node);
>>> - rcu_assign_pointer(b->first_signal,
>>> - rb ? to_signaler(rb) : NULL);
>>> + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>>> + &request->fence.flags)) {
>>> + local_bh_disable();
>>> + dma_fence_signal(&request->fence);
>>> + local_bh_enable(); /* kick start the tasklets */
>>> }
>>> - rb_erase(&request->signaling.node, &b->signals);
>>> - RB_CLEAR_NODE(&request->signaling.node);
>>> -
>>> - spin_unlock_irq(&b->rb_lock);
>>>
>>> - i915_gem_request_put(request);
>>> + if (READ_ONCE(request->signaling.wait.seqno)) {
>>> + spin_lock_irq(&b->rb_lock);
>>> + __intel_engine_remove_signal(engine, request);
>>> + spin_unlock_irq(&b->rb_lock);
>>> + }
>>
>> How would you view taking the request->lock over this block in the
>> signaler and then being able to call simply
>> intel_engine_cancel_signaling - ending up with very similar code as in
>> i915_gem_request_retire?
>
> I am not keen on the conflation here (maybe it's just a hatred of bool
> parameters). But at first glance it's just the commonality of
> remove_signal, which is already a common routine?
>
>> Only difference would be the tasklet kicking, but maybe still it would
>> be possible to consolidate the two in one helper.
>>
>> __i915_gem_request_retire_signaling(req, bool kick_tasklets)
>> {
>> if (!DMA_FENCE_FLAG_SIGNALED_BIT) {
>> dma_fence_signal_locked(...);
>> if (kick_tasklets) {
>> local_bh_disable();
>> local_bh_enable();
>> }
>
> We can't kick the tasklet inside a spinlock. Especially not a lockclass
> as nested as request.lock :(
Yep bool is ugly. So maybe make the helper return status, so the caller
can kick if fence was signaled? Or you would worry about losing a little
bit of latency? That also is not ideal I agree.
Too bad, I would kind of like to consolidate if nothing to be
symmetrical wrt req->lock usage.
Otherwise patch looks safe to me. At least I failed to find any problems.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: ✗ Fi.CI.IGT: failure for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
2018-02-03 11:32 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-02-05 16:09 ` Chris Wilson
0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-02-05 16:09 UTC (permalink / raw)
To: Patchwork; +Cc: intel-gfx
Quoting Patchwork (2018-02-03 11:32:01)
> == Series Details ==
>
> Series: drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2)
> URL : https://patchwork.freedesktop.org/series/36908/
> State : failure
>
> == Summary ==
>
> Test perf_pmu:
> Subgroup busy-double-start-vcs0:
> pass -> INCOMPLETE (shard-apl)
> Test kms_cursor_legacy:
> Subgroup flip-vs-cursor-atomic:
> fail -> PASS (shard-hsw) fdo#102670 +1
> Test kms_flip:
> Subgroup flip-vs-absolute-wf_vblank-interruptible:
> pass -> FAIL (shard-hsw) fdo#100368
> Subgroup 2x-flip-vs-expired-vblank-interruptible:
> fail -> PASS (shard-hsw) fdo#102887
> Subgroup 2x-plain-flip-ts-check:
> fail -> PASS (shard-hsw)
> Test kms_rotation_crc:
> Subgroup bad-tiling:
> pass -> FAIL (shard-apl) fdo#103925
> Test gem_eio:
> Subgroup in-flight-contexts:
> dmesg-warn -> PASS (shard-snb) fdo#104058
> Test kms_draw_crc:
> Subgroup fill-fb:
> pass -> SKIP (shard-snb)
>
> fdo#102670 https://bugs.freedesktop.org/show_bug.cgi?id=102670
> fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
> fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
> fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
> fdo#104058 https://bugs.freedesktop.org/show_bug.cgi?id=104058
>
> shard-apl total:2795 pass:1724 dwarn:1 dfail:0 fail:20 skip:1049 time:12065s
> shard-hsw total:2836 pass:1732 dwarn:1 dfail:0 fail:12 skip:1090 time:11572s
> shard-snb total:2836 pass:1327 dwarn:1 dfail:0 fail:10 skip:1498 time:6385s
> Blacklisted hosts:
> shard-kbl total:2836 pass:1870 dwarn:3 dfail:0 fail:22 skip:941 time:9372s
>
> == Logs ==
>
> For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7878/shards.html
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v2] drm/i915/breadcrumbs: Drop request reference for the signaler thread
2018-02-05 13:45 ` Tvrtko Ursulin
@ 2018-02-05 16:11 ` Chris Wilson
0 siblings, 0 replies; 21+ messages in thread
From: Chris Wilson @ 2018-02-05 16:11 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
Quoting Tvrtko Ursulin (2018-02-05 13:45:16)
>
> On 05/02/2018 13:36, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-02-05 13:23:54)
> >> How would you view taking the request->lock over this block in the
> >> signaler and then being able to call simply
> >> intel_engine_cancel_signaling - ending up with very similar code as in
> >> i915_gem_request_retire?
> >
> > I am not keen on the conflation here (maybe it's just a hatred of bool
> > parameters). But at first glance it's just the commonality of
> > remove_signal, which is already a common routine?
> >
> >> Only difference would be the tasklet kicking, but maybe still it would
> >> be possible to consolidate the two in one helper.
> >>
> >> __i915_gem_request_retire_signaling(req, bool kick_tasklets)
> >> {
> >> if (!DMA_FENCE_FLAG_SIGNALED_BIT) {
> >> dma_fence_signal_locked(...);
> >> if (kick_tasklets) {
> >> local_bh_disable();
> >> local_bh_enable();
> >> }
> >
> > We can't kick the tasklet inside a spinlock. Especially not a lockclass
> > as nested as request.lock :(
>
> Yep bool is ugly. So maybe make the helper return status, so the caller
> can kick if fence was signaled? Or you would worry about losing a little
> bit of latency? That also is not ideal I agree.
>
> Too bad, I would kind of like to consolidate if nothing to be
> symmetrical wrt req->lock usage.
>
> Otherwise patch looks safe to me. At least I failed to find any problems.
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Took the r-b and pushed because I wanted to see the back of this oom
fix. The next steps in this series are to try and lighten the
irq/signaler threads, so suggestions are most appreciated.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2018-02-05 16:11 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-22 15:41 [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread Chris Wilson
2018-01-22 16:07 ` ✗ Fi.CI.BAT: warning for " Patchwork
2018-01-22 19:01 ` ✓ Fi.CI.BAT: success " Patchwork
2018-01-23 0:03 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-01-24 13:09 ` [PATCH] " Tvrtko Ursulin
2018-01-24 14:44 ` Chris Wilson
2018-01-26 9:42 ` Chris Wilson
2018-01-31 10:02 ` Chris Wilson
2018-01-31 17:18 ` Tvrtko Ursulin
2018-01-31 17:30 ` Chris Wilson
2018-01-31 20:40 ` Chris Wilson
2018-01-31 20:43 ` Chris Wilson
2018-02-03 10:19 ` [PATCH v2] " Chris Wilson
2018-02-05 13:23 ` Tvrtko Ursulin
2018-02-05 13:36 ` Chris Wilson
2018-02-05 13:39 ` Chris Wilson
2018-02-05 13:45 ` Tvrtko Ursulin
2018-02-05 16:11 ` Chris Wilson
2018-02-03 10:40 ` ✓ Fi.CI.BAT: success for drm/i915/breadcrumbs: Drop request reference for the signaler thread (rev2) Patchwork
2018-02-03 11:32 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-02-05 16:09 ` Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.