All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences
@ 2019-12-04 21:17 Chris Wilson
  2019-12-04 22:24 ` Chris Wilson
                   ` (5 more replies)
  0 siblings, 6 replies; 12+ messages in thread
From: Chris Wilson @ 2019-12-04 21:17 UTC (permalink / raw)
  To: intel-gfx

We want the bonded request to have the same scheduler properties as its
master so that it is placed at the same depth in the queue. For example,
consider we have requests A, B and B', where B & B' are a bonded pair to
run in parallel on two engines.

	A -> B
     	     \- B'

B will run after A and so may be scheduled on an idle engine and wait on
A using a semaphore. B' sees B being executed and so enters the queue on
the same engine as A. As B' did not inherit the semaphore-chain from B,
it may have higher precedence than A and so preempts execution. However,
B' then sits on a semaphore waiting for B, who is waiting for A, who is
blocked by B.

Ergo B' needs to inherit the scheduler properties from B (i.e. the
semaphore chain) so that it is scheduled with the same priority as B and
will not be executed ahead of Bs dependencies.

Furthermore, to prevent the priorities changing via the expose fence on
B', we need to couple in the dependencies for PI. This requires us to
relax our sanity-checks that dependencies are strictly in order.

Fixes: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding")
Testcase: igt/gem_exec_balancer/bonded-chain
Testcase: igt/gem_exec_balancer/bonded-semaphore
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
Due to deep ELSP submission, the bonded pair may be submitted long
before its master reaches ELSP[0] -- we need to wait in the pairs as we
are no longer relying on the user to do so.
---
 drivers/gpu/drm/i915/i915_request.c   | 115 ++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_scheduler.c |   1 -
 2 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 3109b8a79b9f..b0f0cfef1eb1 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 }
 
 static int
-__i915_request_await_execution(struct i915_request *rq,
-			       struct i915_request *signal,
-			       void (*hook)(struct i915_request *rq,
-					    struct dma_fence *signal),
-			       gfp_t gfp)
+__await_execution(struct i915_request *rq,
+		  struct i915_request *signal,
+		  void (*hook)(struct i915_request *rq,
+			       struct dma_fence *signal),
+		  gfp_t gfp)
 {
 	struct execute_cb *cb;
 
@@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq,
 	}
 	spin_unlock_irq(&signal->lock);
 
+	/* Copy across semaphore status as we need the same behaviour */
+	rq->sched.flags |= signal->sched.flags;
 	return 0;
 }
 
@@ -824,31 +826,21 @@ already_busywaiting(struct i915_request *rq)
 }
 
 static int
-emit_semaphore_wait(struct i915_request *to,
-		    struct i915_request *from,
-		    gfp_t gfp)
+__emit_semaphore_wait(struct i915_request *to,
+		      struct i915_request *from,
+		      u32 seqno)
 {
 	const int has_token = INTEL_GEN(to->i915) >= 12;
 	u32 hwsp_offset;
-	int len;
+	int len, err;
 	u32 *cs;
 
 	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
 
-	/* Just emit the first semaphore we see as request space is limited. */
-	if (already_busywaiting(to) & from->engine->mask)
-		goto await_fence;
-
-	if (i915_request_await_start(to, from) < 0)
-		goto await_fence;
-
-	/* Only submit our spinner after the signaler is running! */
-	if (__i915_request_await_execution(to, from, NULL, gfp))
-		goto await_fence;
-
 	/* We need to pin the signaler's HWSP until we are finished reading. */
-	if (intel_timeline_read_hwsp(from, to, &hwsp_offset))
-		goto await_fence;
+	err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
+	if (err)
+		return err;
 
 	len = 4;
 	if (has_token)
@@ -871,7 +863,7 @@ emit_semaphore_wait(struct i915_request *to,
 		 MI_SEMAPHORE_POLL |
 		 MI_SEMAPHORE_SAD_GTE_SDD) +
 		has_token;
-	*cs++ = from->fence.seqno;
+	*cs++ = seqno;
 	*cs++ = hwsp_offset;
 	*cs++ = 0;
 	if (has_token) {
@@ -880,6 +872,28 @@ emit_semaphore_wait(struct i915_request *to,
 	}
 
 	intel_ring_advance(to, cs);
+	return 0;
+}
+
+static int
+emit_semaphore_wait(struct i915_request *to,
+		    struct i915_request *from,
+		    gfp_t gfp)
+{
+	/* Just emit the first semaphore we see as request space is limited. */
+	if (already_busywaiting(to) & from->engine->mask)
+		goto await_fence;
+
+	if (i915_request_await_start(to, from) < 0)
+		goto await_fence;
+
+	/* Only submit our spinner after the signaler is running! */
+	if (__await_execution(to, from, NULL, gfp))
+		goto await_fence;
+
+	if (__emit_semaphore_wait(to, from, from->fence.seqno))
+		goto await_fence;
+
 	to->sched.semaphores |= from->engine->mask;
 	to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
 	return 0;
@@ -993,6 +1007,58 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 	return 0;
 }
 
+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
+					  struct dma_fence *fence)
+{
+	return __intel_timeline_sync_is_later(tl,
+					      fence->context,
+					      fence->seqno - 1);
+}
+
+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
+					 const struct dma_fence *fence)
+{
+	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
+}
+
+static int
+__i915_request_await_execution(struct i915_request *to,
+			       struct i915_request *from,
+			       void (*hook)(struct i915_request *rq,
+					    struct dma_fence *signal))
+{
+	int err;
+
+	/* Submit both requests at the same time */
+	err = __await_execution(to, from, hook, I915_FENCE_GFP);
+	if (err)
+		return err;
+
+	if (!to->engine->schedule)
+		return 0;
+
+	/* Squash repeated depenendices to the same timelines */
+	if (intel_timeline_sync_has_start(i915_request_timeline(to),
+					  &from->fence))
+		return 0;
+
+	/* Ensure both start together [after all semaphores in signal] */
+	if (intel_engine_has_semaphores(to->engine))
+		err =__emit_semaphore_wait(to, from, from->fence.seqno - 1);
+	else
+		err = i915_request_await_start(to, from);
+	if (err < 0)
+		return err;
+
+	/* Couple the dependency tree for PI on this exposed to->fence */
+	err = i915_sched_node_add_dependency(&to->sched, &from->sched);
+	if (err < 0)
+		return err;
+
+	return intel_timeline_sync_set_start(i915_request_timeline(to),
+					     &from->fence);
+}
+
 int
 i915_request_await_execution(struct i915_request *rq,
 			     struct dma_fence *fence,
@@ -1026,8 +1092,7 @@ i915_request_await_execution(struct i915_request *rq,
 		if (dma_fence_is_i915(fence))
 			ret = __i915_request_await_execution(rq,
 							     to_request(fence),
-							     hook,
-							     I915_FENCE_GFP);
+							     hook);
 		else
 			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
 							    I915_FENCE_TIMEOUT,
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index ad6ff52bc316..00f5d107f2b7 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -492,7 +492,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 	 * so we may be called out-of-order.
 	 */
 	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
-		GEM_BUG_ON(!node_signaled(dep->signaler));
 		GEM_BUG_ON(!list_empty(&dep->dfs_link));
 
 		list_del(&dep->wait_link);
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences
  2019-12-04 21:17 [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Chris Wilson
@ 2019-12-04 22:24 ` Chris Wilson
  2019-12-05  3:14 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Copy across scheduler behaviour flags across submit fences (rev4) Patchwork
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2019-12-04 22:24 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2019-12-04 21:17:03)
> We want the bonded request to have the same scheduler properties as its
> master so that it is placed at the same depth in the queue. For example,
> consider we have requests A, B and B', where B & B' are a bonded pair to
> run in parallel on two engines.
> 
>         A -> B
>              \- B'
> 
> B will run after A and so may be scheduled on an idle engine and wait on
> A using a semaphore. B' sees B being executed and so enters the queue on
> the same engine as A. As B' did not inherit the semaphore-chain from B,
> it may have higher precedence than A and so preempts execution. However,
> B' then sits on a semaphore waiting for B, who is waiting for A, who is
> blocked by B.
> 
> Ergo B' needs to inherit the scheduler properties from B (i.e. the
> semaphore chain) so that it is scheduled with the same priority as B and
> will not be executed ahead of Bs dependencies.
> 
> Furthermore, to prevent the priorities changing via the expose fence on
> B', we need to couple in the dependencies for PI. This requires us to
> relax our sanity-checks that dependencies are strictly in order.
> 
> Fixes: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding")
> Testcase: igt/gem_exec_balancer/bonded-chain
> Testcase: igt/gem_exec_balancer/bonded-semaphore
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Closes: https://gitlab.freedesktop.org/drm/intel/issues/464
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Copy across scheduler behaviour flags across submit fences (rev4)
  2019-12-04 21:17 [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Chris Wilson
  2019-12-04 22:24 ` Chris Wilson
@ 2019-12-05  3:14 ` Patchwork
  2019-12-05  3:42 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2019-12-05  3:14 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Copy across scheduler behaviour flags across submit fences (rev4)
URL   : https://patchwork.freedesktop.org/series/70107/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
9445907506b3 drm/i915: Copy across scheduler behaviour flags across submit fences
-:184: ERROR:SPACING: spaces required around that '=' (ctx:WxV)
#184: FILE: drivers/gpu/drm/i915/i915_request.c:1047:
+		err =__emit_semaphore_wait(to, from, from->fence.seqno - 1);
 		    ^

total: 1 errors, 0 warnings, 0 checks, 172 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Copy across scheduler behaviour flags across submit fences (rev4)
  2019-12-04 21:17 [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Chris Wilson
  2019-12-04 22:24 ` Chris Wilson
  2019-12-05  3:14 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Copy across scheduler behaviour flags across submit fences (rev4) Patchwork
@ 2019-12-05  3:42 ` Patchwork
  2019-12-10 12:41 ` [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Tvrtko Ursulin
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2019-12-05  3:42 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Copy across scheduler behaviour flags across submit fences (rev4)
URL   : https://patchwork.freedesktop.org/series/70107/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7486 -> Patchwork_15592
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/index.html

Known issues
------------

  Here are the changes found in Patchwork_15592 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_blt:
    - fi-ivb-3770:        [PASS][1] -> [DMESG-FAIL][2] ([i915#563])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7486/fi-ivb-3770/igt@i915_selftest@live_blt.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/fi-ivb-3770/igt@i915_selftest@live_blt.html

  * igt@i915_selftest@live_execlists:
    - fi-skl-6600u:       [PASS][3] -> [INCOMPLETE][4] ([i915#529])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7486/fi-skl-6600u/igt@i915_selftest@live_execlists.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/fi-skl-6600u/igt@i915_selftest@live_execlists.html

  * igt@i915_selftest@live_gem_contexts:
    - fi-hsw-peppy:       [PASS][5] -> [DMESG-FAIL][6] ([fdo#111692])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7486/fi-hsw-peppy/igt@i915_selftest@live_gem_contexts.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/fi-hsw-peppy/igt@i915_selftest@live_gem_contexts.html

  
#### Possible fixes ####

  * igt@i915_selftest@live_blt:
    - fi-hsw-4770:        [DMESG-FAIL][7] ([i915#683]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7486/fi-hsw-4770/igt@i915_selftest@live_blt.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/fi-hsw-4770/igt@i915_selftest@live_blt.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-hsw-peppy:       [DMESG-WARN][9] ([i915#44]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7486/fi-hsw-peppy/igt@kms_frontbuffer_tracking@basic.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/fi-hsw-peppy/igt@kms_frontbuffer_tracking@basic.html

  
#### Warnings ####

  * igt@gem_exec_suspend@basic-s4-devices:
    - fi-kbl-x1275:       [DMESG-WARN][11] ([fdo#107139] / [i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][12] ([fdo#107139] / [i915#62] / [i915#92])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7486/fi-kbl-x1275/igt@gem_exec_suspend@basic-s4-devices.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/fi-kbl-x1275/igt@gem_exec_suspend@basic-s4-devices.html

  * igt@i915_selftest@live_blt:
    - fi-hsw-4770r:       [DMESG-FAIL][13] ([i915#683]) -> [DMESG-FAIL][14] ([i915#563])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7486/fi-hsw-4770r/igt@i915_selftest@live_blt.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/fi-hsw-4770r/igt@i915_selftest@live_blt.html

  * igt@kms_busy@basic-flip-pipe-b:
    - fi-kbl-x1275:       [DMESG-WARN][15] ([i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][16] ([i915#62] / [i915#92]) +6 similar issues
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7486/fi-kbl-x1275/igt@kms_busy@basic-flip-pipe-b.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/fi-kbl-x1275/igt@kms_busy@basic-flip-pipe-b.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - fi-kbl-x1275:       [DMESG-WARN][17] ([i915#62] / [i915#92]) -> [DMESG-WARN][18] ([i915#62] / [i915#92] / [i915#95]) +5 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7486/fi-kbl-x1275/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/fi-kbl-x1275/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#107139]: https://bugs.freedesktop.org/show_bug.cgi?id=107139
  [fdo#111692]: https://bugs.freedesktop.org/show_bug.cgi?id=111692
  [fdo#111736]: https://bugs.freedesktop.org/show_bug.cgi?id=111736
  [i915#44]: https://gitlab.freedesktop.org/drm/intel/issues/44
  [i915#460]: https://gitlab.freedesktop.org/drm/intel/issues/460
  [i915#476]: https://gitlab.freedesktop.org/drm/intel/issues/476
  [i915#529]: https://gitlab.freedesktop.org/drm/intel/issues/529
  [i915#563]: https://gitlab.freedesktop.org/drm/intel/issues/563
  [i915#62]: https://gitlab.freedesktop.org/drm/intel/issues/62
  [i915#683]: https://gitlab.freedesktop.org/drm/intel/issues/683
  [i915#710]: https://gitlab.freedesktop.org/drm/intel/issues/710
  [i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (53 -> 47)
------------------------------

  Additional (1): fi-cfl-8109u 
  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7486 -> Patchwork_15592

  CI-20190529: 20190529
  CI_DRM_7486: 8bd05bdd30fdd1b56307b780ab0f0e8de878c75a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5331: dad473697b2e6bb5c45d7fec533b20d5dbe4fa17 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_15592: 9445907506b333e31f07ce973c411cd841f0199e @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

9445907506b3 drm/i915: Copy across scheduler behaviour flags across submit fences

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15592/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences
  2019-12-04 21:17 [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Chris Wilson
                   ` (2 preceding siblings ...)
  2019-12-05  3:42 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2019-12-10 12:41 ` Tvrtko Ursulin
  2019-12-10 13:06   ` Chris Wilson
  2019-12-10 15:13 ` [Intel-gfx] [PATCH v5] " Chris Wilson
  2019-12-11  1:54 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Copy across scheduler behaviour flags across submit fences (rev5) Patchwork
  5 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2019-12-10 12:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 04/12/2019 21:17, Chris Wilson wrote:
> We want the bonded request to have the same scheduler properties as its
> master so that it is placed at the same depth in the queue. For example,
> consider we have requests A, B and B', where B & B' are a bonded pair to
> run in parallel on two engines.
> 
> 	A -> B
>       	     \- B'
> 
> B will run after A and so may be scheduled on an idle engine and wait on
> A using a semaphore. B' sees B being executed and so enters the queue on
> the same engine as A. As B' did not inherit the semaphore-chain from B,
> it may have higher precedence than A and so preempts execution. However,
> B' then sits on a semaphore waiting for B, who is waiting for A, who is
> blocked by B.
> 
> Ergo B' needs to inherit the scheduler properties from B (i.e. the
> semaphore chain) so that it is scheduled with the same priority as B and
> will not be executed ahead of Bs dependencies.

It makes sense in general to inherit, although in this example why would 
B' not be preempted by A, once it starts blocking on the semaphore? I am 
thinking more and more we should not have priorities imply strict order.

> 
> Furthermore, to prevent the priorities changing via the expose fence on
> B', we need to couple in the dependencies for PI. This requires us to
> relax our sanity-checks that dependencies are strictly in order.
> 
> Fixes: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding")
> Testcase: igt/gem_exec_balancer/bonded-chain
> Testcase: igt/gem_exec_balancer/bonded-semaphore
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> Due to deep ELSP submission, the bonded pair may be submitted long
> before its master reaches ELSP[0] -- we need to wait in the pairs as we
> are no longer relying on the user to do so.
> ---
>   drivers/gpu/drm/i915/i915_request.c   | 115 ++++++++++++++++++++------
>   drivers/gpu/drm/i915/i915_scheduler.c |   1 -
>   2 files changed, 90 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 3109b8a79b9f..b0f0cfef1eb1 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq)
>   }
>   
>   static int
> -__i915_request_await_execution(struct i915_request *rq,
> -			       struct i915_request *signal,
> -			       void (*hook)(struct i915_request *rq,
> -					    struct dma_fence *signal),
> -			       gfp_t gfp)
> +__await_execution(struct i915_request *rq,
> +		  struct i915_request *signal,
> +		  void (*hook)(struct i915_request *rq,
> +			       struct dma_fence *signal),
> +		  gfp_t gfp)
>   {
>   	struct execute_cb *cb;
>   
> @@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq,
>   	}
>   	spin_unlock_irq(&signal->lock);
>   
> +	/* Copy across semaphore status as we need the same behaviour */
> +	rq->sched.flags |= signal->sched.flags;
>   	return 0;
>   }
>   
> @@ -824,31 +826,21 @@ already_busywaiting(struct i915_request *rq)
>   }
>   
>   static int
> -emit_semaphore_wait(struct i915_request *to,
> -		    struct i915_request *from,
> -		    gfp_t gfp)
> +__emit_semaphore_wait(struct i915_request *to,
> +		      struct i915_request *from,
> +		      u32 seqno)
>   {
>   	const int has_token = INTEL_GEN(to->i915) >= 12;
>   	u32 hwsp_offset;
> -	int len;
> +	int len, err;
>   	u32 *cs;
>   
>   	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
>   
> -	/* Just emit the first semaphore we see as request space is limited. */
> -	if (already_busywaiting(to) & from->engine->mask)
> -		goto await_fence;
> -
> -	if (i915_request_await_start(to, from) < 0)
> -		goto await_fence;
> -
> -	/* Only submit our spinner after the signaler is running! */
> -	if (__i915_request_await_execution(to, from, NULL, gfp))
> -		goto await_fence;
> -
>   	/* We need to pin the signaler's HWSP until we are finished reading. */
> -	if (intel_timeline_read_hwsp(from, to, &hwsp_offset))
> -		goto await_fence;
> +	err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
> +	if (err)
> +		return err;
>   
>   	len = 4;
>   	if (has_token)
> @@ -871,7 +863,7 @@ emit_semaphore_wait(struct i915_request *to,
>   		 MI_SEMAPHORE_POLL |
>   		 MI_SEMAPHORE_SAD_GTE_SDD) +
>   		has_token;
> -	*cs++ = from->fence.seqno;
> +	*cs++ = seqno;
>   	*cs++ = hwsp_offset;
>   	*cs++ = 0;
>   	if (has_token) {
> @@ -880,6 +872,28 @@ emit_semaphore_wait(struct i915_request *to,
>   	}
>   
>   	intel_ring_advance(to, cs);
> +	return 0;
> +}
> +
> +static int
> +emit_semaphore_wait(struct i915_request *to,
> +		    struct i915_request *from,
> +		    gfp_t gfp)
> +{
> +	/* Just emit the first semaphore we see as request space is limited. */
> +	if (already_busywaiting(to) & from->engine->mask)
> +		goto await_fence;
> +
> +	if (i915_request_await_start(to, from) < 0)
> +		goto await_fence;
> +
> +	/* Only submit our spinner after the signaler is running! */
> +	if (__await_execution(to, from, NULL, gfp))
> +		goto await_fence;
> +
> +	if (__emit_semaphore_wait(to, from, from->fence.seqno))
> +		goto await_fence;
> +
>   	to->sched.semaphores |= from->engine->mask;
>   	to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
>   	return 0;
> @@ -993,6 +1007,58 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>   	return 0;
>   }
>   
> +static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
> +					  struct dma_fence *fence)
> +{
> +	return __intel_timeline_sync_is_later(tl,
> +					      fence->context,
> +					      fence->seqno - 1);
> +}
> +
> +static int intel_timeline_sync_set_start(struct intel_timeline *tl,
> +					 const struct dma_fence *fence)
> +{
> +	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
> +}
> +
> +static int
> +__i915_request_await_execution(struct i915_request *to,
> +			       struct i915_request *from,
> +			       void (*hook)(struct i915_request *rq,
> +					    struct dma_fence *signal))
> +{
> +	int err;
> +
> +	/* Submit both requests at the same time */
> +	err = __await_execution(to, from, hook, I915_FENCE_GFP);
> +	if (err)
> +		return err;
> +
> +	if (!to->engine->schedule)
> +		return 0;

Hm is this about scheduler or preemption?

> +
> +	/* Squash repeated depenendices to the same timelines */

typo in dependencies

> +	if (intel_timeline_sync_has_start(i915_request_timeline(to),
> +					  &from->fence))
> +		return 0;
> +
> +	/* Ensure both start together [after all semaphores in signal] */
> +	if (intel_engine_has_semaphores(to->engine))
> +		err =__emit_semaphore_wait(to, from, from->fence.seqno - 1);

So the only thing preventing B' getting onto the same engine as A, as 
soon as B enters a different engine, is the priority order?

And if I am reading this correctly, change relative to current state is 
to let B' in, but spin on a semaphore, where currently we let it execute 
the actual payload.

It's possible that we do need this if we said we would guarantee bonded 
request would not execute before it's master. Have we made this 
guarantee when discussing the uAPI? We must had..

But with no semaphores i915_request_await_start can not offer this hard 
guarantee which then sounds like a problem. Do we need to only allow 
bonding where we have semaphores?

> +	else
> +		err = i915_request_await_start(to, from);
> +	if (err < 0)
> +		return err;
> +
> +	/* Couple the dependency tree for PI on this exposed to->fence */
> +	err = i915_sched_node_add_dependency(&to->sched, &from->sched);
> +	if (err < 0)
> +		return err;
> +
> +	return intel_timeline_sync_set_start(i915_request_timeline(to),
> +					     &from->fence);
> +}
> +
>   int
>   i915_request_await_execution(struct i915_request *rq,
>   			     struct dma_fence *fence,
> @@ -1026,8 +1092,7 @@ i915_request_await_execution(struct i915_request *rq,
>   		if (dma_fence_is_i915(fence))
>   			ret = __i915_request_await_execution(rq,
>   							     to_request(fence),
> -							     hook,
> -							     I915_FENCE_GFP);
> +							     hook);
>   		else
>   			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
>   							    I915_FENCE_TIMEOUT,
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index ad6ff52bc316..00f5d107f2b7 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -492,7 +492,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>   	 * so we may be called out-of-order.
>   	 */
>   	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
> -		GEM_BUG_ON(!node_signaled(dep->signaler));
>   		GEM_BUG_ON(!list_empty(&dep->dfs_link));
>   
>   		list_del(&dep->wait_link);
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences
  2019-12-10 12:41 ` [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Tvrtko Ursulin
@ 2019-12-10 13:06   ` Chris Wilson
  2019-12-10 14:12     ` Tvrtko Ursulin
  0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2019-12-10 13:06 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-12-10 12:41:36)
> 
> On 04/12/2019 21:17, Chris Wilson wrote:
> > We want the bonded request to have the same scheduler properties as its
> > master so that it is placed at the same depth in the queue. For example,
> > consider we have requests A, B and B', where B & B' are a bonded pair to
> > run in parallel on two engines.
> > 
> >       A -> B
> >                    \- B'
> > 
> > B will run after A and so may be scheduled on an idle engine and wait on
> > A using a semaphore. B' sees B being executed and so enters the queue on
> > the same engine as A. As B' did not inherit the semaphore-chain from B,
> > it may have higher precedence than A and so preempts execution. However,
> > B' then sits on a semaphore waiting for B, who is waiting for A, who is
> > blocked by B.
> > 
> > Ergo B' needs to inherit the scheduler properties from B (i.e. the
> > semaphore chain) so that it is scheduled with the same priority as B and
> > will not be executed ahead of Bs dependencies.
> 
> It makes sense in general to inherit, although in this example why would 
> B' not be preempted by A, once it starts blocking on the semaphore? I am 
> thinking more and more we should not have priorities imply strict order.

Even if we model ourselves after CFS, we still have exceptional
schedulers like RR or DEADLINE. So priority inversion will remain an
issue, and the way we are tackling that is by tracking dependencies.

And which semaphore do you mean? Ours or userspace? Both are ultimately
effectively countered by timeslicing, the question is how to decide when
to slice and how to order slices.

Anyway the old joke about this being a 'prescheduler' still applies -- we
don't yet have a scheduler worthy of the name.

> > Furthermore, to prevent the priorities changing via the expose fence on
> > B', we need to couple in the dependencies for PI. This requires us to
> > relax our sanity-checks that dependencies are strictly in order.
> > 
> > Fixes: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding")
> > Testcase: igt/gem_exec_balancer/bonded-chain
> > Testcase: igt/gem_exec_balancer/bonded-semaphore
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> > Due to deep ELSP submission, the bonded pair may be submitted long
> > before its master reaches ELSP[0] -- we need to wait in the pairs as we
> > are no longer relying on the user to do so.
> > ---
> >   drivers/gpu/drm/i915/i915_request.c   | 115 ++++++++++++++++++++------
> >   drivers/gpu/drm/i915/i915_scheduler.c |   1 -
> >   2 files changed, 90 insertions(+), 26 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 3109b8a79b9f..b0f0cfef1eb1 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq)
> >   }
> >   
> >   static int
> > -__i915_request_await_execution(struct i915_request *rq,
> > -                            struct i915_request *signal,
> > -                            void (*hook)(struct i915_request *rq,
> > -                                         struct dma_fence *signal),
> > -                            gfp_t gfp)
> > +__await_execution(struct i915_request *rq,
> > +               struct i915_request *signal,
> > +               void (*hook)(struct i915_request *rq,
> > +                            struct dma_fence *signal),
> > +               gfp_t gfp)
> >   {
> >       struct execute_cb *cb;
> >   
> > @@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq,
> >       }
> >       spin_unlock_irq(&signal->lock);
> >   
> > +     /* Copy across semaphore status as we need the same behaviour */
> > +     rq->sched.flags |= signal->sched.flags;
> >       return 0;
> >   }
> >   
> > @@ -824,31 +826,21 @@ already_busywaiting(struct i915_request *rq)
> >   }
> >   
> >   static int
> > -emit_semaphore_wait(struct i915_request *to,
> > -                 struct i915_request *from,
> > -                 gfp_t gfp)
> > +__emit_semaphore_wait(struct i915_request *to,
> > +                   struct i915_request *from,
> > +                   u32 seqno)
> >   {
> >       const int has_token = INTEL_GEN(to->i915) >= 12;
> >       u32 hwsp_offset;
> > -     int len;
> > +     int len, err;
> >       u32 *cs;
> >   
> >       GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
> >   
> > -     /* Just emit the first semaphore we see as request space is limited. */
> > -     if (already_busywaiting(to) & from->engine->mask)
> > -             goto await_fence;
> > -
> > -     if (i915_request_await_start(to, from) < 0)
> > -             goto await_fence;
> > -
> > -     /* Only submit our spinner after the signaler is running! */
> > -     if (__i915_request_await_execution(to, from, NULL, gfp))
> > -             goto await_fence;
> > -
> >       /* We need to pin the signaler's HWSP until we are finished reading. */
> > -     if (intel_timeline_read_hwsp(from, to, &hwsp_offset))
> > -             goto await_fence;
> > +     err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
> > +     if (err)
> > +             return err;
> >   
> >       len = 4;
> >       if (has_token)
> > @@ -871,7 +863,7 @@ emit_semaphore_wait(struct i915_request *to,
> >                MI_SEMAPHORE_POLL |
> >                MI_SEMAPHORE_SAD_GTE_SDD) +
> >               has_token;
> > -     *cs++ = from->fence.seqno;
> > +     *cs++ = seqno;
> >       *cs++ = hwsp_offset;
> >       *cs++ = 0;
> >       if (has_token) {
> > @@ -880,6 +872,28 @@ emit_semaphore_wait(struct i915_request *to,
> >       }
> >   
> >       intel_ring_advance(to, cs);
> > +     return 0;
> > +}
> > +
> > +static int
> > +emit_semaphore_wait(struct i915_request *to,
> > +                 struct i915_request *from,
> > +                 gfp_t gfp)
> > +{
> > +     /* Just emit the first semaphore we see as request space is limited. */
> > +     if (already_busywaiting(to) & from->engine->mask)
> > +             goto await_fence;
> > +
> > +     if (i915_request_await_start(to, from) < 0)
> > +             goto await_fence;
> > +
> > +     /* Only submit our spinner after the signaler is running! */
> > +     if (__await_execution(to, from, NULL, gfp))
> > +             goto await_fence;
> > +
> > +     if (__emit_semaphore_wait(to, from, from->fence.seqno))
> > +             goto await_fence;
> > +
> >       to->sched.semaphores |= from->engine->mask;
> >       to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
> >       return 0;
> > @@ -993,6 +1007,58 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
> >       return 0;
> >   }
> >   
> > +static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
> > +                                       struct dma_fence *fence)
> > +{
> > +     return __intel_timeline_sync_is_later(tl,
> > +                                           fence->context,
> > +                                           fence->seqno - 1);
> > +}
> > +
> > +static int intel_timeline_sync_set_start(struct intel_timeline *tl,
> > +                                      const struct dma_fence *fence)
> > +{
> > +     return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
> > +}
> > +
> > +static int
> > +__i915_request_await_execution(struct i915_request *to,
> > +                            struct i915_request *from,
> > +                            void (*hook)(struct i915_request *rq,
> > +                                         struct dma_fence *signal))
> > +{
> > +     int err;
> > +
> > +     /* Submit both requests at the same time */
> > +     err = __await_execution(to, from, hook, I915_FENCE_GFP);
> > +     if (err)
> > +             return err;
> > +
> > +     if (!to->engine->schedule)
> > +             return 0;
> 
> Hm is this about scheduler or preemption?

It's about dependency tracking, and the lack of it.
 
> > +
> > +     /* Squash repeated depenendices to the same timelines */
> 
> typo in dependencies
> 
> > +     if (intel_timeline_sync_has_start(i915_request_timeline(to),
> > +                                       &from->fence))
> > +             return 0;
> > +
> > +     /* Ensure both start together [after all semaphores in signal] */
> > +     if (intel_engine_has_semaphores(to->engine))
> > +             err =__emit_semaphore_wait(to, from, from->fence.seqno - 1);
> 
> So the only thing preventing B' getting onto the same engine as A, as 
> soon as B enters a different engine, is the priority order?

No. Now B' has a dependency on A, so B' is always after A.
 
> And if I am reading this correctly, change relative to current state is 
> to let B' in, but spin on a semaphore, where currently we let it execute 
> the actual payload.
> 
> It's possible that we do need this if we said we would guarantee bonded 
> request would not execute before it's master. Have we made this 
> guarantee when discussing the uAPI? We must had..

We did not make that guarantee as the assumption was that all fences for
B would be passed to B'. However, the since fence slot for IN/SUBMIT
precludes that (I was thinking you could just merge them, but the
interpretation of the merged fence would still be from the IN/SUBMIT
flag), and we haven't extended syncobj yet.

We can completely invalidate that argument by the simple use of an
FENCE_OUT for B'. That gives us a massive hole in the PI tree, and
userspace deadlocks galore.
 
> But with no semaphores i915_request_await_start can not offer this hard 
> guarantee which then sounds like a problem. Do we need to only allow 
> bonding where we have semaphores?

No. If we don't have semaphores, then we aren't using semaphores in B.
So then B and B' are scheduled to start the payload at the same time,
since they have the same fences. Coordination of the payload itself is
beyond our control -- we've just made sure that all dependencies are met
before the payload began.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences
  2019-12-10 13:06   ` Chris Wilson
@ 2019-12-10 14:12     ` Tvrtko Ursulin
  2019-12-10 14:29       ` Chris Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2019-12-10 14:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 10/12/2019 13:06, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-12-10 12:41:36)
>>
>> On 04/12/2019 21:17, Chris Wilson wrote:
>>> We want the bonded request to have the same scheduler properties as its
>>> master so that it is placed at the same depth in the queue. For example,
>>> consider we have requests A, B and B', where B & B' are a bonded pair to
>>> run in parallel on two engines.
>>>
>>>        A -> B
>>>                     \- B'
>>>
>>> B will run after A and so may be scheduled on an idle engine and wait on
>>> A using a semaphore. B' sees B being executed and so enters the queue on
>>> the same engine as A. As B' did not inherit the semaphore-chain from B,
>>> it may have higher precedence than A and so preempts execution. However,
>>> B' then sits on a semaphore waiting for B, who is waiting for A, who is
>>> blocked by B.
>>>
>>> Ergo B' needs to inherit the scheduler properties from B (i.e. the
>>> semaphore chain) so that it is scheduled with the same priority as B and
>>> will not be executed ahead of Bs dependencies.
>>
>> It makes sense in general to inherit, although in this example why would
>> B' not be preempted by A, once it starts blocking on the semaphore? I am
>> thinking more and more we should not have priorities imply strict order.
> 
> Even if we model ourselves after CFS, we still have exceptional
> schedulers like RR or DEADLINE. So priority inversion will remain an
> issue, and the way we are tackling that is by tracking dependencies.
> 
> And which semaphore do you mean? Ours or userspace? Both are ultimately
> effectively countered by timeslicing, the question is how to decide when
> to slice and how to order slices.

I was thinking about our semaphore. Then the fact we would never shuffle 
contexts around, but keep priority order would prevent A ever preempting 
B' again. Being how they are both blocked, if we round-robin them they 
would eventually progress. But yeah, it's still of course infinitely 
better to track dependencies and execute things in efficient order.

> Anyway the old joke about this being a 'prescheduler' still applies -- we
> don't yet have a scheduler worthy of the name.
> 
>>> Furthermore, to prevent the priorities changing via the expose fence on
>>> B', we need to couple in the dependencies for PI. This requires us to
>>> relax our sanity-checks that dependencies are strictly in order.
>>>
>>> Fixes: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding")
>>> Testcase: igt/gem_exec_balancer/bonded-chain
>>> Testcase: igt/gem_exec_balancer/bonded-semaphore
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>> Due to deep ELSP submission, the bonded pair may be submitted long
>>> before its master reaches ELSP[0] -- we need to wait in the pairs as we
>>> are no longer relying on the user to do so.
>>> ---
>>>    drivers/gpu/drm/i915/i915_request.c   | 115 ++++++++++++++++++++------
>>>    drivers/gpu/drm/i915/i915_scheduler.c |   1 -
>>>    2 files changed, 90 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>> index 3109b8a79b9f..b0f0cfef1eb1 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>> @@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq)
>>>    }
>>>    
>>>    static int
>>> -__i915_request_await_execution(struct i915_request *rq,
>>> -                            struct i915_request *signal,
>>> -                            void (*hook)(struct i915_request *rq,
>>> -                                         struct dma_fence *signal),
>>> -                            gfp_t gfp)
>>> +__await_execution(struct i915_request *rq,
>>> +               struct i915_request *signal,
>>> +               void (*hook)(struct i915_request *rq,
>>> +                            struct dma_fence *signal),
>>> +               gfp_t gfp)
>>>    {
>>>        struct execute_cb *cb;
>>>    
>>> @@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq,
>>>        }
>>>        spin_unlock_irq(&signal->lock);
>>>    
>>> +     /* Copy across semaphore status as we need the same behaviour */
>>> +     rq->sched.flags |= signal->sched.flags;
>>>        return 0;
>>>    }
>>>    
>>> @@ -824,31 +826,21 @@ already_busywaiting(struct i915_request *rq)
>>>    }
>>>    
>>>    static int
>>> -emit_semaphore_wait(struct i915_request *to,
>>> -                 struct i915_request *from,
>>> -                 gfp_t gfp)
>>> +__emit_semaphore_wait(struct i915_request *to,
>>> +                   struct i915_request *from,
>>> +                   u32 seqno)
>>>    {
>>>        const int has_token = INTEL_GEN(to->i915) >= 12;
>>>        u32 hwsp_offset;
>>> -     int len;
>>> +     int len, err;
>>>        u32 *cs;
>>>    
>>>        GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
>>>    
>>> -     /* Just emit the first semaphore we see as request space is limited. */
>>> -     if (already_busywaiting(to) & from->engine->mask)
>>> -             goto await_fence;
>>> -
>>> -     if (i915_request_await_start(to, from) < 0)
>>> -             goto await_fence;
>>> -
>>> -     /* Only submit our spinner after the signaler is running! */
>>> -     if (__i915_request_await_execution(to, from, NULL, gfp))
>>> -             goto await_fence;
>>> -
>>>        /* We need to pin the signaler's HWSP until we are finished reading. */
>>> -     if (intel_timeline_read_hwsp(from, to, &hwsp_offset))
>>> -             goto await_fence;
>>> +     err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
>>> +     if (err)
>>> +             return err;
>>>    
>>>        len = 4;
>>>        if (has_token)
>>> @@ -871,7 +863,7 @@ emit_semaphore_wait(struct i915_request *to,
>>>                 MI_SEMAPHORE_POLL |
>>>                 MI_SEMAPHORE_SAD_GTE_SDD) +
>>>                has_token;
>>> -     *cs++ = from->fence.seqno;
>>> +     *cs++ = seqno;
>>>        *cs++ = hwsp_offset;
>>>        *cs++ = 0;
>>>        if (has_token) {
>>> @@ -880,6 +872,28 @@ emit_semaphore_wait(struct i915_request *to,
>>>        }
>>>    
>>>        intel_ring_advance(to, cs);
>>> +     return 0;
>>> +}
>>> +
>>> +static int
>>> +emit_semaphore_wait(struct i915_request *to,
>>> +                 struct i915_request *from,
>>> +                 gfp_t gfp)
>>> +{
>>> +     /* Just emit the first semaphore we see as request space is limited. */
>>> +     if (already_busywaiting(to) & from->engine->mask)
>>> +             goto await_fence;
>>> +
>>> +     if (i915_request_await_start(to, from) < 0)
>>> +             goto await_fence;
>>> +
>>> +     /* Only submit our spinner after the signaler is running! */
>>> +     if (__await_execution(to, from, NULL, gfp))
>>> +             goto await_fence;
>>> +
>>> +     if (__emit_semaphore_wait(to, from, from->fence.seqno))
>>> +             goto await_fence;
>>> +
>>>        to->sched.semaphores |= from->engine->mask;
>>>        to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
>>>        return 0;
>>> @@ -993,6 +1007,58 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>>>        return 0;
>>>    }
>>>    
>>> +static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
>>> +                                       struct dma_fence *fence)
>>> +{
>>> +     return __intel_timeline_sync_is_later(tl,
>>> +                                           fence->context,
>>> +                                           fence->seqno - 1);
>>> +}
>>> +
>>> +static int intel_timeline_sync_set_start(struct intel_timeline *tl,
>>> +                                      const struct dma_fence *fence)
>>> +{
>>> +     return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
>>> +}
>>> +
>>> +static int
>>> +__i915_request_await_execution(struct i915_request *to,
>>> +                            struct i915_request *from,
>>> +                            void (*hook)(struct i915_request *rq,
>>> +                                         struct dma_fence *signal))
>>> +{
>>> +     int err;
>>> +
>>> +     /* Submit both requests at the same time */
>>> +     err = __await_execution(to, from, hook, I915_FENCE_GFP);
>>> +     if (err)
>>> +             return err;
>>> +
>>> +     if (!to->engine->schedule)
>>> +             return 0;
>>
>> Hm is this about scheduler or preemption?
> 
> It's about dependency tracking, and the lack of it.
>   
>>> +
>>> +     /* Squash repeated depenendices to the same timelines */
>>
>> typo in dependencies
>>
>>> +     if (intel_timeline_sync_has_start(i915_request_timeline(to),
>>> +                                       &from->fence))
>>> +             return 0;
>>> +
>>> +     /* Ensure both start together [after all semaphores in signal] */
>>> +     if (intel_engine_has_semaphores(to->engine))
>>> +             err =__emit_semaphore_wait(to, from, from->fence.seqno - 1);
>>
>> So the only thing preventing B' getting onto the same engine as A, as
>> soon as B enters a different engine, is the priority order?
> 
> No. Now B' has a dependency on A, so B' is always after A.

Yes, true.

>> And if I am reading this correctly, change relative to current state is
>> to let B' in, but spin on a semaphore, where currently we let it execute
>> the actual payload.
>>
>> It's possible that we do need this if we said we would guarantee bonded
>> request would not execute before it's master. Have we made this
>> guarantee when discussing the uAPI? We must had..
> 
> We did not make that guarantee as the assumption was that all fences for
> B would be passed to B'. However, the since fence slot for IN/SUBMIT

I think we must have made a guarantee B' won't start executing before B. 
That is kind of the central point. Only thing we did not do is 
guarantee/made effort to start B' together with B. But guarantee got 
defeated by ELSP and later timeslicing/preemption.

Previously, miss was if B was in ELSP[1] B' could be put in ELSP[0] 
(different engines). Which is wrong. And with timeslicing/preemption 
even more so.

So having await_started or semaphore looks correct in that respect. And 
scheduler deps cover the A in chain. So I think it's good with this patch.

> precludes that (I was thinking you could just merge them, but the
> interpretation of the merged fence would still be from the IN/SUBMIT
> flag), and we haven't extended syncobj yet.
> 
> We can completely invalidate that argument by the simple use of an
> FENCE_OUT for B'. That gives us a massive hole in the PI tree, and
> userspace deadlocks galore.
>   
>> But with no semaphores i915_request_await_start can not offer this hard
>> guarantee which then sounds like a problem. Do we need to only allow
>> bonding where we have semaphores?
> 
> No. If we don't have semaphores, then we aren't using semaphores in B.
> So then B and B' are scheduled to start the payload at the same time,
> since they have the same fences. Coordination of the payload itself is
> beyond our control -- we've just made sure that all dependencies are met
> before the payload began.

Yes, without semaphores await_start is a submit fence, I missed that.

Regards,

Tvrtko



_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences
  2019-12-10 14:12     ` Tvrtko Ursulin
@ 2019-12-10 14:29       ` Chris Wilson
  2019-12-10 15:04         ` Tvrtko Ursulin
  0 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2019-12-10 14:29 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-12-10 14:12:31)
> 
> On 10/12/2019 13:06, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-12-10 12:41:36)
> >>
> >> On 04/12/2019 21:17, Chris Wilson wrote:
> >>> +static int
> >>> +__i915_request_await_execution(struct i915_request *to,
> >>> +                            struct i915_request *from,
> >>> +                            void (*hook)(struct i915_request *rq,
> >>> +                                         struct dma_fence *signal))
> >>> +{
> >>> +     int err;
> >>> +
> >>> +     /* Submit both requests at the same time */
> >>> +     err = __await_execution(to, from, hook, I915_FENCE_GFP);
> >>> +     if (err)
> >>> +             return err;
> >>> +
> >>> +     if (!to->engine->schedule)
> >>> +             return 0;
> >>
> >> Hm is this about scheduler or preemption?
> > 
> > It's about dependency tracking, and the lack of it.
> >   
> >>> +
> >>> +     /* Squash repeated depenendices to the same timelines */
> >>
> >> typo in dependencies
> >>
> >>> +     if (intel_timeline_sync_has_start(i915_request_timeline(to),
> >>> +                                       &from->fence))
> >>> +             return 0;
> >>> +
> >>> +     /* Ensure both start together [after all semaphores in signal] */
> >>> +     if (intel_engine_has_semaphores(to->engine))
> >>> +             err =__emit_semaphore_wait(to, from, from->fence.seqno - 1);
> >>
> >> So the only thing preventing B' getting onto the same engine as A, as
> >> soon as B enters a different engine, is the priority order?
> > 
> > No. Now B' has a dependency on A, so B' is always after A.
> 
> Yes, true.
> 
> >> And if I am reading this correctly, change relative to current state is
> >> to let B' in, but spin on a semaphore, where currently we let it execute
> >> the actual payload.
> >>
> >> It's possible that we do need this if we said we would guarantee bonded
> >> request would not execute before it's master. Have we made this
> >> guarantee when discussing the uAPI? We must had..
> > 
> > We did not make that guarantee as the assumption was that all fences for
> > B would be passed to B'. However, the since fence slot for IN/SUBMIT
> 
> I think we must have made a guarantee B' won't start executing before B. 
> That is kind of the central point. Only thing we did not do is 
> guarantee/made effort to start B' together with B. But guarantee got 
> defeated by ELSP and later timeslicing/preemption.

According to the implementation, we did not ;)

I agree that the stronger coordination makes more sense for the API, and
the inheritance of dependencies I think is crucial for exported fences.
 
> Previously, miss was if B was in ELSP[1] B' could be put in ELSP[0] 
> (different engines). Which is wrong. And with timeslicing/preemption 
> even more so.

It wasn't actually an oversight :) My understanding was that B' payload
would not start before B payload via user semaphores, so I wrote it off
as a bad bubble.

The oversight, imo, is that we shouldn't rely on userspace for this and
with the current implementation it is easy to lose PI.
 
> So having await_started or semaphore looks correct in that respect. And 
> scheduler deps cover the A in chain. So I think it's good with this patch.

Agreed.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences
  2019-12-10 14:29       ` Chris Wilson
@ 2019-12-10 15:04         ` Tvrtko Ursulin
  0 siblings, 0 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2019-12-10 15:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 10/12/2019 14:29, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-12-10 14:12:31)
>>
>> On 10/12/2019 13:06, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-12-10 12:41:36)
>>>>
>>>> On 04/12/2019 21:17, Chris Wilson wrote:
>>>>> +static int
>>>>> +__i915_request_await_execution(struct i915_request *to,
>>>>> +                            struct i915_request *from,
>>>>> +                            void (*hook)(struct i915_request *rq,
>>>>> +                                         struct dma_fence *signal))
>>>>> +{
>>>>> +     int err;
>>>>> +
>>>>> +     /* Submit both requests at the same time */
>>>>> +     err = __await_execution(to, from, hook, I915_FENCE_GFP);
>>>>> +     if (err)
>>>>> +             return err;
>>>>> +
>>>>> +     if (!to->engine->schedule)
>>>>> +             return 0;
>>>>
>>>> Hm is this about scheduler or preemption?
>>>
>>> It's about dependency tracking, and the lack of it.
>>>    
>>>>> +
>>>>> +     /* Squash repeated depenendices to the same timelines */
>>>>
>>>> typo in dependencies
>>>>
>>>>> +     if (intel_timeline_sync_has_start(i915_request_timeline(to),
>>>>> +                                       &from->fence))
>>>>> +             return 0;
>>>>> +
>>>>> +     /* Ensure both start together [after all semaphores in signal] */
>>>>> +     if (intel_engine_has_semaphores(to->engine))
>>>>> +             err =__emit_semaphore_wait(to, from, from->fence.seqno - 1);
>>>>
>>>> So the only thing preventing B' getting onto the same engine as A, as
>>>> soon as B enters a different engine, is the priority order?
>>>
>>> No. Now B' has a dependency on A, so B' is always after A.
>>
>> Yes, true.
>>
>>>> And if I am reading this correctly, change relative to current state is
>>>> to let B' in, but spin on a semaphore, where currently we let it execute
>>>> the actual payload.
>>>>
>>>> It's possible that we do need this if we said we would guarantee bonded
>>>> request would not execute before it's master. Have we made this
>>>> guarantee when discussing the uAPI? We must had..
>>>
>>> We did not make that guarantee as the assumption was that all fences for
>>> B would be passed to B'. However, the since fence slot for IN/SUBMIT
>>
>> I think we must have made a guarantee B' won't start executing before B.
>> That is kind of the central point. Only thing we did not do is
>> guarantee/made effort to start B' together with B. But guarantee got
>> defeated by ELSP and later timeslicing/preemption.
> 
> According to the implementation, we did not ;)
> 
> I agree that the stronger coordination makes more sense for the API, and
> the inheritance of dependencies I think is crucial for exported fences.
>   
>> Previously, miss was if B was in ELSP[1] B' could be put in ELSP[0]
>> (different engines). Which is wrong. And with timeslicing/preemption
>> even more so.
> 
> It wasn't actually an oversight :) My understanding was that B' payload
> would not start before B payload via user semaphores, so I wrote it off
> as a bad bubble.
> 
> The oversight, imo, is that we shouldn't rely on userspace for this and
> with the current implementation it is easy to lose PI.
>   
>> So having await_started or semaphore looks correct in that respect. And
>> scheduler deps cover the A in chain. So I think it's good with this patch.
> 
> Agreed.

For correctness you don't think if !scheduler early return should happen 
after the semaphore and await_start insertion? I know that where we have 
ELSP we have scheduler so maybe it is moot point.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Intel-gfx] [PATCH v5] drm/i915: Copy across scheduler behaviour flags across submit fences
  2019-12-04 21:17 [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Chris Wilson
                   ` (3 preceding siblings ...)
  2019-12-10 12:41 ` [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Tvrtko Ursulin
@ 2019-12-10 15:13 ` Chris Wilson
  2019-12-10 15:15   ` Tvrtko Ursulin
  2019-12-11  1:54 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Copy across scheduler behaviour flags across submit fences (rev5) Patchwork
  5 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2019-12-10 15:13 UTC (permalink / raw)
  To: intel-gfx

We want the bonded request to have the same scheduler properties as its
master so that it is placed at the same depth in the queue. For example,
consider we have requests A, B and B', where B & B' are a bonded pair to
run in parallel on two engines.

	A -> B
     	     \- B'

B will run after A and so may be scheduled on an idle engine and wait on
A using a semaphore. B' sees B being executed and so enters the queue on
the same engine as A. As B' did not inherit the semaphore-chain from B,
it may have higher precedence than A and so preempts execution. However,
B' then sits on a semaphore waiting for B, who is waiting for A, who is
blocked by B.

Ergo B' needs to inherit the scheduler properties from B (i.e. the
semaphore chain) so that it is scheduled with the same priority as B and
will not be executed ahead of Bs dependencies.

Furthermore, to prevent the priorities changing via the expose fence on
B', we need to couple in the dependencies for PI. This requires us to
relax our sanity-checks that dependencies are strictly in order.

v2: Synchronise (B, B') execution on all platforms, regardless of using
a scheduler, any no-op syncs should be elided.

Fixes: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding")
Closes: https://gitlab.freedesktop.org/drm/intel/issues/464
Testcase: igt/gem_exec_balancer/bonded-chain
Testcase: igt/gem_exec_balancer/bonded-semaphore
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c   | 114 ++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_scheduler.c |   1 -
 2 files changed, 89 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ddc6c311349c..a6238c626a16 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq)
 }
 
 static int
-__i915_request_await_execution(struct i915_request *rq,
-			       struct i915_request *signal,
-			       void (*hook)(struct i915_request *rq,
-					    struct dma_fence *signal),
-			       gfp_t gfp)
+__await_execution(struct i915_request *rq,
+		  struct i915_request *signal,
+		  void (*hook)(struct i915_request *rq,
+			       struct dma_fence *signal),
+		  gfp_t gfp)
 {
 	struct execute_cb *cb;
 
@@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq,
 	}
 	spin_unlock_irq(&signal->lock);
 
+	/* Copy across semaphore status as we need the same behaviour */
+	rq->sched.flags |= signal->sched.flags;
 	return 0;
 }
 
@@ -824,31 +826,21 @@ already_busywaiting(struct i915_request *rq)
 }
 
 static int
-emit_semaphore_wait(struct i915_request *to,
-		    struct i915_request *from,
-		    gfp_t gfp)
+__emit_semaphore_wait(struct i915_request *to,
+		      struct i915_request *from,
+		      u32 seqno)
 {
 	const int has_token = INTEL_GEN(to->i915) >= 12;
 	u32 hwsp_offset;
-	int len;
+	int len, err;
 	u32 *cs;
 
 	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
 
-	/* Just emit the first semaphore we see as request space is limited. */
-	if (already_busywaiting(to) & from->engine->mask)
-		goto await_fence;
-
-	if (i915_request_await_start(to, from) < 0)
-		goto await_fence;
-
-	/* Only submit our spinner after the signaler is running! */
-	if (__i915_request_await_execution(to, from, NULL, gfp))
-		goto await_fence;
-
 	/* We need to pin the signaler's HWSP until we are finished reading. */
-	if (intel_timeline_read_hwsp(from, to, &hwsp_offset))
-		goto await_fence;
+	err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
+	if (err)
+		return err;
 
 	len = 4;
 	if (has_token)
@@ -871,7 +863,7 @@ emit_semaphore_wait(struct i915_request *to,
 		 MI_SEMAPHORE_POLL |
 		 MI_SEMAPHORE_SAD_GTE_SDD) +
 		has_token;
-	*cs++ = from->fence.seqno;
+	*cs++ = seqno;
 	*cs++ = hwsp_offset;
 	*cs++ = 0;
 	if (has_token) {
@@ -880,6 +872,28 @@ emit_semaphore_wait(struct i915_request *to,
 	}
 
 	intel_ring_advance(to, cs);
+	return 0;
+}
+
+static int
+emit_semaphore_wait(struct i915_request *to,
+		    struct i915_request *from,
+		    gfp_t gfp)
+{
+	/* Just emit the first semaphore we see as request space is limited. */
+	if (already_busywaiting(to) & from->engine->mask)
+		goto await_fence;
+
+	if (i915_request_await_start(to, from) < 0)
+		goto await_fence;
+
+	/* Only submit our spinner after the signaler is running! */
+	if (__await_execution(to, from, NULL, gfp))
+		goto await_fence;
+
+	if (__emit_semaphore_wait(to, from, from->fence.seqno))
+		goto await_fence;
+
 	to->sched.semaphores |= from->engine->mask;
 	to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
 	return 0;
@@ -995,6 +1009,57 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 	return 0;
 }
 
+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
+					  struct dma_fence *fence)
+{
+	return __intel_timeline_sync_is_later(tl,
+					      fence->context,
+					      fence->seqno - 1);
+}
+
+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
+					 const struct dma_fence *fence)
+{
+	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
+}
+
+static int
+__i915_request_await_execution(struct i915_request *to,
+			       struct i915_request *from,
+			       void (*hook)(struct i915_request *rq,
+					    struct dma_fence *signal))
+{
+	int err;
+
+	/* Submit both requests at the same time */
+	err = __await_execution(to, from, hook, I915_FENCE_GFP);
+	if (err)
+		return err;
+
+	/* Squash repeated depenendices to the same timelines */
+	if (intel_timeline_sync_has_start(i915_request_timeline(to),
+					  &from->fence))
+		return 0;
+
+	/* Ensure both start together [after all semaphores in signal] */
+	if (intel_engine_has_semaphores(to->engine))
+		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+	else
+		err = i915_request_await_start(to, from);
+	if (err < 0)
+		return err;
+
+	/* Couple the dependency tree for PI on this exposed to->fence */
+	if (to->engine->schedule) {
+		err = i915_sched_node_add_dependency(&to->sched, &from->sched);
+		if (err < 0)
+			return err;
+	}
+
+	return intel_timeline_sync_set_start(i915_request_timeline(to),
+					     &from->fence);
+}
+
 int
 i915_request_await_execution(struct i915_request *rq,
 			     struct dma_fence *fence,
@@ -1030,8 +1095,7 @@ i915_request_await_execution(struct i915_request *rq,
 		if (dma_fence_is_i915(fence))
 			ret = __i915_request_await_execution(rq,
 							     to_request(fence),
-							     hook,
-							     I915_FENCE_GFP);
+							     hook);
 		else
 			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
 							    I915_FENCE_TIMEOUT,
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 1937a26d412f..2bc2aa46a1b9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -484,7 +484,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 	 * so we may be called out-of-order.
 	 */
 	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
-		GEM_BUG_ON(!node_signaled(dep->signaler));
 		GEM_BUG_ON(!list_empty(&dep->dfs_link));
 
 		list_del(&dep->wait_link);
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [Intel-gfx] [PATCH v5] drm/i915: Copy across scheduler behaviour flags across submit fences
  2019-12-10 15:13 ` [Intel-gfx] [PATCH v5] " Chris Wilson
@ 2019-12-10 15:15   ` Tvrtko Ursulin
  0 siblings, 0 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2019-12-10 15:15 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 10/12/2019 15:13, Chris Wilson wrote:
> We want the bonded request to have the same scheduler properties as its
> master so that it is placed at the same depth in the queue. For example,
> consider we have requests A, B and B', where B & B' are a bonded pair to
> run in parallel on two engines.
> 
> 	A -> B
>       	     \- B'
> 
> B will run after A and so may be scheduled on an idle engine and wait on
> A using a semaphore. B' sees B being executed and so enters the queue on
> the same engine as A. As B' did not inherit the semaphore-chain from B,
> it may have higher precedence than A and so preempts execution. However,
> B' then sits on a semaphore waiting for B, who is waiting for A, who is
> blocked by B.
> 
> Ergo B' needs to inherit the scheduler properties from B (i.e. the
> semaphore chain) so that it is scheduled with the same priority as B and
> will not be executed ahead of Bs dependencies.
> 
> Furthermore, to prevent the priorities changing via the expose fence on
> B', we need to couple in the dependencies for PI. This requires us to
> relax our sanity-checks that dependencies are strictly in order.
> 
> v2: Synchronise (B, B') execution on all platforms, regardless of using
> a scheduler, any no-op syncs should be elided.
> 
> Fixes: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding")
> Closes: https://gitlab.freedesktop.org/drm/intel/issues/464
> Testcase: igt/gem_exec_balancer/bonded-chain
> Testcase: igt/gem_exec_balancer/bonded-semaphore
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c   | 114 ++++++++++++++++++++------
>   drivers/gpu/drm/i915/i915_scheduler.c |   1 -
>   2 files changed, 89 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index ddc6c311349c..a6238c626a16 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq)
>   }
>   
>   static int
> -__i915_request_await_execution(struct i915_request *rq,
> -			       struct i915_request *signal,
> -			       void (*hook)(struct i915_request *rq,
> -					    struct dma_fence *signal),
> -			       gfp_t gfp)
> +__await_execution(struct i915_request *rq,
> +		  struct i915_request *signal,
> +		  void (*hook)(struct i915_request *rq,
> +			       struct dma_fence *signal),
> +		  gfp_t gfp)
>   {
>   	struct execute_cb *cb;
>   
> @@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq,
>   	}
>   	spin_unlock_irq(&signal->lock);
>   
> +	/* Copy across semaphore status as we need the same behaviour */
> +	rq->sched.flags |= signal->sched.flags;
>   	return 0;
>   }
>   
> @@ -824,31 +826,21 @@ already_busywaiting(struct i915_request *rq)
>   }
>   
>   static int
> -emit_semaphore_wait(struct i915_request *to,
> -		    struct i915_request *from,
> -		    gfp_t gfp)
> +__emit_semaphore_wait(struct i915_request *to,
> +		      struct i915_request *from,
> +		      u32 seqno)
>   {
>   	const int has_token = INTEL_GEN(to->i915) >= 12;
>   	u32 hwsp_offset;
> -	int len;
> +	int len, err;
>   	u32 *cs;
>   
>   	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
>   
> -	/* Just emit the first semaphore we see as request space is limited. */
> -	if (already_busywaiting(to) & from->engine->mask)
> -		goto await_fence;
> -
> -	if (i915_request_await_start(to, from) < 0)
> -		goto await_fence;
> -
> -	/* Only submit our spinner after the signaler is running! */
> -	if (__i915_request_await_execution(to, from, NULL, gfp))
> -		goto await_fence;
> -
>   	/* We need to pin the signaler's HWSP until we are finished reading. */
> -	if (intel_timeline_read_hwsp(from, to, &hwsp_offset))
> -		goto await_fence;
> +	err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
> +	if (err)
> +		return err;
>   
>   	len = 4;
>   	if (has_token)
> @@ -871,7 +863,7 @@ emit_semaphore_wait(struct i915_request *to,
>   		 MI_SEMAPHORE_POLL |
>   		 MI_SEMAPHORE_SAD_GTE_SDD) +
>   		has_token;
> -	*cs++ = from->fence.seqno;
> +	*cs++ = seqno;
>   	*cs++ = hwsp_offset;
>   	*cs++ = 0;
>   	if (has_token) {
> @@ -880,6 +872,28 @@ emit_semaphore_wait(struct i915_request *to,
>   	}
>   
>   	intel_ring_advance(to, cs);
> +	return 0;
> +}
> +
> +static int
> +emit_semaphore_wait(struct i915_request *to,
> +		    struct i915_request *from,
> +		    gfp_t gfp)
> +{
> +	/* Just emit the first semaphore we see as request space is limited. */
> +	if (already_busywaiting(to) & from->engine->mask)
> +		goto await_fence;
> +
> +	if (i915_request_await_start(to, from) < 0)
> +		goto await_fence;
> +
> +	/* Only submit our spinner after the signaler is running! */
> +	if (__await_execution(to, from, NULL, gfp))
> +		goto await_fence;
> +
> +	if (__emit_semaphore_wait(to, from, from->fence.seqno))
> +		goto await_fence;
> +
>   	to->sched.semaphores |= from->engine->mask;
>   	to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
>   	return 0;
> @@ -995,6 +1009,57 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>   	return 0;
>   }
>   
> +static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
> +					  struct dma_fence *fence)
> +{
> +	return __intel_timeline_sync_is_later(tl,
> +					      fence->context,
> +					      fence->seqno - 1);
> +}
> +
> +static int intel_timeline_sync_set_start(struct intel_timeline *tl,
> +					 const struct dma_fence *fence)
> +{
> +	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
> +}
> +
> +static int
> +__i915_request_await_execution(struct i915_request *to,
> +			       struct i915_request *from,
> +			       void (*hook)(struct i915_request *rq,
> +					    struct dma_fence *signal))
> +{
> +	int err;
> +
> +	/* Submit both requests at the same time */
> +	err = __await_execution(to, from, hook, I915_FENCE_GFP);
> +	if (err)
> +		return err;
> +
> +	/* Squash repeated depenendices to the same timelines */
> +	if (intel_timeline_sync_has_start(i915_request_timeline(to),
> +					  &from->fence))
> +		return 0;
> +
> +	/* Ensure both start together [after all semaphores in signal] */
> +	if (intel_engine_has_semaphores(to->engine))
> +		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
> +	else
> +		err = i915_request_await_start(to, from);
> +	if (err < 0)
> +		return err;
> +
> +	/* Couple the dependency tree for PI on this exposed to->fence */
> +	if (to->engine->schedule) {
> +		err = i915_sched_node_add_dependency(&to->sched, &from->sched);
> +		if (err < 0)
> +			return err;
> +	}
> +
> +	return intel_timeline_sync_set_start(i915_request_timeline(to),
> +					     &from->fence);
> +}
> +
>   int
>   i915_request_await_execution(struct i915_request *rq,
>   			     struct dma_fence *fence,
> @@ -1030,8 +1095,7 @@ i915_request_await_execution(struct i915_request *rq,
>   		if (dma_fence_is_i915(fence))
>   			ret = __i915_request_await_execution(rq,
>   							     to_request(fence),
> -							     hook,
> -							     I915_FENCE_GFP);
> +							     hook);
>   		else
>   			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
>   							    I915_FENCE_TIMEOUT,
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 1937a26d412f..2bc2aa46a1b9 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -484,7 +484,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>   	 * so we may be called out-of-order.
>   	 */
>   	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
> -		GEM_BUG_ON(!node_signaled(dep->signaler));
>   		GEM_BUG_ON(!list_empty(&dep->dfs_link));
>   
>   		list_del(&dep->wait_link);
> 


Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Copy across scheduler behaviour flags across submit fences (rev5)
  2019-12-04 21:17 [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Chris Wilson
                   ` (4 preceding siblings ...)
  2019-12-10 15:13 ` [Intel-gfx] [PATCH v5] " Chris Wilson
@ 2019-12-11  1:54 ` Patchwork
  5 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2019-12-11  1:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Copy across scheduler behaviour flags across submit fences (rev5)
URL   : https://patchwork.freedesktop.org/series/70107/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_7534 -> Patchwork_15673
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_15673 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_15673, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_15673:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_exec_gttfill@basic:
    - fi-ivb-3770:        [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-ivb-3770/igt@gem_exec_gttfill@basic.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-ivb-3770/igt@gem_exec_gttfill@basic.html

  
Known issues
------------

  Here are the changes found in Patchwork_15673 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_blt:
    - fi-ivb-3770:        [PASS][3] -> [DMESG-FAIL][4] ([i915#725])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-ivb-3770/igt@i915_selftest@live_blt.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-ivb-3770/igt@i915_selftest@live_blt.html
    - fi-byt-j1900:       [PASS][5] -> [DMESG-FAIL][6] ([i915#725])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-byt-j1900/igt@i915_selftest@live_blt.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-byt-j1900/igt@i915_selftest@live_blt.html
    - fi-hsw-4770:        [PASS][7] -> [DMESG-FAIL][8] ([i915#725])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-hsw-4770/igt@i915_selftest@live_blt.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-hsw-4770/igt@i915_selftest@live_blt.html
    - fi-bsw-nick:        [PASS][9] -> [DMESG-FAIL][10] ([i915#723])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-bsw-nick/igt@i915_selftest@live_blt.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-bsw-nick/igt@i915_selftest@live_blt.html
    - fi-byt-n2820:       [PASS][11] -> [DMESG-FAIL][12] ([i915#725])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-byt-n2820/igt@i915_selftest@live_blt.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-byt-n2820/igt@i915_selftest@live_blt.html

  
#### Possible fixes ####

  * igt@gem_ctx_create@basic-files:
    - {fi-tgl-u}:         [INCOMPLETE][13] ([fdo#111735]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-tgl-u/igt@gem_ctx_create@basic-files.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-tgl-u/igt@gem_ctx_create@basic-files.html

  
#### Warnings ####

  * igt@i915_selftest@live_gem_contexts:
    - fi-hsw-peppy:       [INCOMPLETE][15] ([i915#694]) -> [DMESG-FAIL][16] ([i915#722])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-hsw-peppy/igt@i915_selftest@live_gem_contexts.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-hsw-peppy/igt@i915_selftest@live_gem_contexts.html

  * igt@kms_busy@basic-flip-pipe-b:
    - fi-kbl-x1275:       [DMESG-WARN][17] ([i915#62] / [i915#92]) -> [DMESG-WARN][18] ([i915#62] / [i915#92] / [i915#95]) +4 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-kbl-x1275/igt@kms_busy@basic-flip-pipe-b.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-kbl-x1275/igt@kms_busy@basic-flip-pipe-b.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][19] ([fdo#111407]) -> [FAIL][20] ([fdo#111096] / [i915#323])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
    - fi-kbl-x1275:       [DMESG-WARN][21] ([i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][22] ([i915#62] / [i915#92]) +5 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7534/fi-kbl-x1275/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/fi-kbl-x1275/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#111407]: https://bugs.freedesktop.org/show_bug.cgi?id=111407
  [fdo#111735]: https://bugs.freedesktop.org/show_bug.cgi?id=111735
  [i915#323]: https://gitlab.freedesktop.org/drm/intel/issues/323
  [i915#62]: https://gitlab.freedesktop.org/drm/intel/issues/62
  [i915#694]: https://gitlab.freedesktop.org/drm/intel/issues/694
  [i915#707]: https://gitlab.freedesktop.org/drm/intel/issues/707
  [i915#722]: https://gitlab.freedesktop.org/drm/intel/issues/722
  [i915#723]: https://gitlab.freedesktop.org/drm/intel/issues/723
  [i915#725]: https://gitlab.freedesktop.org/drm/intel/issues/725
  [i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (53 -> 47)
------------------------------

  Additional (1): fi-hsw-4770r 
  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7534 -> Patchwork_15673

  CI-20190529: 20190529
  CI_DRM_7534: 66a6222c16abb82d6a4480b0a7ff8e375cc6a3a6 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5342: 3e43fb3fa97ce25819444d63848439acf6e397b5 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_15673: c3c041f703633a302cfc3e7efc7522e759d5a71f @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

c3c041f70363 drm/i915: Copy across scheduler behaviour flags across submit fences

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15673/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-12-11  1:54 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-04 21:17 [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Chris Wilson
2019-12-04 22:24 ` Chris Wilson
2019-12-05  3:14 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Copy across scheduler behaviour flags across submit fences (rev4) Patchwork
2019-12-05  3:42 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2019-12-10 12:41 ` [Intel-gfx] [PATCH v3] drm/i915: Copy across scheduler behaviour flags across submit fences Tvrtko Ursulin
2019-12-10 13:06   ` Chris Wilson
2019-12-10 14:12     ` Tvrtko Ursulin
2019-12-10 14:29       ` Chris Wilson
2019-12-10 15:04         ` Tvrtko Ursulin
2019-12-10 15:13 ` [Intel-gfx] [PATCH v5] " Chris Wilson
2019-12-10 15:15   ` Tvrtko Ursulin
2019-12-11  1:54 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Copy across scheduler behaviour flags across submit fences (rev5) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.