All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request
@ 2020-05-26  9:07 Chris Wilson
  2020-05-26  9:07   ` [Intel-gfx] " Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Chris Wilson @ 2020-05-26  9:07 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Reorder the code so that we can reuse the await_execution from a special
case in await_request in the next patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 264 ++++++++++++++--------------
 1 file changed, 132 insertions(+), 132 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c282719ad3ac..33bbad623e02 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1053,37 +1053,91 @@ emit_semaphore_wait(struct i915_request *to,
 					     I915_FENCE_GFP);
 }
 
+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
+					  struct dma_fence *fence)
+{
+	return __intel_timeline_sync_is_later(tl,
+					      fence->context,
+					      fence->seqno - 1);
+}
+
+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
+					 const struct dma_fence *fence)
+{
+	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
+}
+
 static int
-i915_request_await_request(struct i915_request *to, struct i915_request *from)
+__i915_request_await_execution(struct i915_request *to,
+			       struct i915_request *from,
+			       void (*hook)(struct i915_request *rq,
+					    struct dma_fence *signal))
 {
-	int ret;
+	int err;
 
-	GEM_BUG_ON(to == from);
-	GEM_BUG_ON(to->timeline == from->timeline);
+	GEM_BUG_ON(intel_context_is_barrier(from->context));
 
-	if (i915_request_completed(from)) {
-		i915_sw_fence_set_error_once(&to->submit, from->fence.error);
+	/* Submit both requests at the same time */
+	err = __await_execution(to, from, hook, I915_FENCE_GFP);
+	if (err)
+		return err;
+
+	/* Squash repeated depenendices to the same timelines */
+	if (intel_timeline_sync_has_start(i915_request_timeline(to),
+					  &from->fence))
 		return 0;
+
+	/*
+	 * Wait until the start of this request.
+	 *
+	 * The execution cb fires when we submit the request to HW. But in
+	 * many cases this may be long before the request itself is ready to
+	 * run (consider that we submit 2 requests for the same context, where
+	 * the request of interest is behind an indefinite spinner). So we hook
+	 * up to both to reduce our queues and keep the execution lag minimised
+	 * in the worst case, though we hope that the await_start is elided.
+	 */
+	err = i915_request_await_start(to, from);
+	if (err < 0)
+		return err;
+
+	/*
+	 * Ensure both start together [after all semaphores in signal]
+	 *
+	 * Now that we are queued to the HW at roughly the same time (thanks
+	 * to the execute cb) and are ready to run at roughly the same time
+	 * (thanks to the await start), our signaler may still be indefinitely
+	 * delayed by waiting on a semaphore from a remote engine. If our
+	 * signaler depends on a semaphore, so indirectly do we, and we do not
+	 * want to start our payload until our signaler also starts theirs.
+	 * So we wait.
+	 *
+	 * However, there is also a second condition for which we need to wait
+	 * for the precise start of the signaler. Consider that the signaler
+	 * was submitted in a chain of requests following another context
+	 * (with just an ordinary intra-engine fence dependency between the
+	 * two). In this case the signaler is queued to HW, but not for
+	 * immediate execution, and so we must wait until it reaches the
+	 * active slot.
+	 */
+	if (intel_engine_has_semaphores(to->engine) &&
+	    !i915_request_has_initial_breadcrumb(to)) {
+		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+		if (err < 0)
+			return err;
 	}
 
+	/* Couple the dependency tree for PI on this exposed to->fence */
 	if (to->engine->schedule) {
-		ret = i915_sched_node_add_dependency(&to->sched,
+		err = i915_sched_node_add_dependency(&to->sched,
 						     &from->sched,
-						     I915_DEPENDENCY_EXTERNAL);
-		if (ret < 0)
-			return ret;
+						     I915_DEPENDENCY_WEAK);
+		if (err < 0)
+			return err;
 	}
 
-	if (to->engine == from->engine)
-		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
-						       &from->submit,
-						       I915_FENCE_GFP);
-	else
-		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return intel_timeline_sync_set_start(i915_request_timeline(to),
+					     &from->fence);
 }
 
 static void mark_external(struct i915_request *rq)
@@ -1136,23 +1190,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
 }
 
 int
-i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
+i915_request_await_execution(struct i915_request *rq,
+			     struct dma_fence *fence,
+			     void (*hook)(struct i915_request *rq,
+					  struct dma_fence *signal))
 {
 	struct dma_fence **child = &fence;
 	unsigned int nchild = 1;
 	int ret;
 
-	/*
-	 * Note that if the fence-array was created in signal-on-any mode,
-	 * we should *not* decompose it into its individual fences. However,
-	 * we don't currently store which mode the fence-array is operating
-	 * in. Fortunately, the only user of signal-on-any is private to
-	 * amdgpu and we should not see any incoming fence-array from
-	 * sync-file being in signal-on-any mode.
-	 */
 	if (dma_fence_is_array(fence)) {
 		struct dma_fence_array *array = to_dma_fence_array(fence);
 
+		/* XXX Error for signal-on-any fence arrays */
+
 		child = array->fences;
 		nchild = array->num_fences;
 		GEM_BUG_ON(!nchild);
@@ -1165,138 +1216,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 			continue;
 		}
 
-		/*
-		 * Requests on the same timeline are explicitly ordered, along
-		 * with their dependencies, by i915_request_add() which ensures
-		 * that requests are submitted in-order through each ring.
-		 */
 		if (fence->context == rq->fence.context)
 			continue;
 
-		/* Squash repeated waits to the same timelines */
-		if (fence->context &&
-		    intel_timeline_sync_is_later(i915_request_timeline(rq),
-						 fence))
-			continue;
+		/*
+		 * We don't squash repeated fence dependencies here as we
+		 * want to run our callback in all cases.
+		 */
 
 		if (dma_fence_is_i915(fence))
-			ret = i915_request_await_request(rq, to_request(fence));
+			ret = __i915_request_await_execution(rq,
+							     to_request(fence),
+							     hook);
 		else
 			ret = i915_request_await_external(rq, fence);
 		if (ret < 0)
 			return ret;
-
-		/* Record the latest fence used against each timeline */
-		if (fence->context)
-			intel_timeline_sync_set(i915_request_timeline(rq),
-						fence);
 	} while (--nchild);
 
 	return 0;
 }
 
-static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
-					  struct dma_fence *fence)
-{
-	return __intel_timeline_sync_is_later(tl,
-					      fence->context,
-					      fence->seqno - 1);
-}
-
-static int intel_timeline_sync_set_start(struct intel_timeline *tl,
-					 const struct dma_fence *fence)
-{
-	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
-}
-
 static int
-__i915_request_await_execution(struct i915_request *to,
-			       struct i915_request *from,
-			       void (*hook)(struct i915_request *rq,
-					    struct dma_fence *signal))
+i915_request_await_request(struct i915_request *to, struct i915_request *from)
 {
-	int err;
-
-	GEM_BUG_ON(intel_context_is_barrier(from->context));
+	int ret;
 
-	/* Submit both requests at the same time */
-	err = __await_execution(to, from, hook, I915_FENCE_GFP);
-	if (err)
-		return err;
+	GEM_BUG_ON(to == from);
+	GEM_BUG_ON(to->timeline == from->timeline);
 
-	/* Squash repeated depenendices to the same timelines */
-	if (intel_timeline_sync_has_start(i915_request_timeline(to),
-					  &from->fence))
+	if (i915_request_completed(from)) {
+		i915_sw_fence_set_error_once(&to->submit, from->fence.error);
 		return 0;
-
-	/*
-	 * Wait until the start of this request.
-	 *
-	 * The execution cb fires when we submit the request to HW. But in
-	 * many cases this may be long before the request itself is ready to
-	 * run (consider that we submit 2 requests for the same context, where
-	 * the request of interest is behind an indefinite spinner). So we hook
-	 * up to both to reduce our queues and keep the execution lag minimised
-	 * in the worst case, though we hope that the await_start is elided.
-	 */
-	err = i915_request_await_start(to, from);
-	if (err < 0)
-		return err;
-
-	/*
-	 * Ensure both start together [after all semaphores in signal]
-	 *
-	 * Now that we are queued to the HW at roughly the same time (thanks
-	 * to the execute cb) and are ready to run at roughly the same time
-	 * (thanks to the await start), our signaler may still be indefinitely
-	 * delayed by waiting on a semaphore from a remote engine. If our
-	 * signaler depends on a semaphore, so indirectly do we, and we do not
-	 * want to start our payload until our signaler also starts theirs.
-	 * So we wait.
-	 *
-	 * However, there is also a second condition for which we need to wait
-	 * for the precise start of the signaler. Consider that the signaler
-	 * was submitted in a chain of requests following another context
-	 * (with just an ordinary intra-engine fence dependency between the
-	 * two). In this case the signaler is queued to HW, but not for
-	 * immediate execution, and so we must wait until it reaches the
-	 * active slot.
-	 */
-	if (intel_engine_has_semaphores(to->engine) &&
-	    !i915_request_has_initial_breadcrumb(to)) {
-		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
-		if (err < 0)
-			return err;
 	}
 
-	/* Couple the dependency tree for PI on this exposed to->fence */
 	if (to->engine->schedule) {
-		err = i915_sched_node_add_dependency(&to->sched,
+		ret = i915_sched_node_add_dependency(&to->sched,
 						     &from->sched,
-						     I915_DEPENDENCY_WEAK);
-		if (err < 0)
-			return err;
+						     I915_DEPENDENCY_EXTERNAL);
+		if (ret < 0)
+			return ret;
 	}
 
-	return intel_timeline_sync_set_start(i915_request_timeline(to),
-					     &from->fence);
+	if (to->engine == READ_ONCE(from->engine))
+		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
+						       &from->submit,
+						       I915_FENCE_GFP);
+	else
+		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 int
-i915_request_await_execution(struct i915_request *rq,
-			     struct dma_fence *fence,
-			     void (*hook)(struct i915_request *rq,
-					  struct dma_fence *signal))
+i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 {
 	struct dma_fence **child = &fence;
 	unsigned int nchild = 1;
 	int ret;
 
+	/*
+	 * Note that if the fence-array was created in signal-on-any mode,
+	 * we should *not* decompose it into its individual fences. However,
+	 * we don't currently store which mode the fence-array is operating
+	 * in. Fortunately, the only user of signal-on-any is private to
+	 * amdgpu and we should not see any incoming fence-array from
+	 * sync-file being in signal-on-any mode.
+	 */
 	if (dma_fence_is_array(fence)) {
 		struct dma_fence_array *array = to_dma_fence_array(fence);
 
-		/* XXX Error for signal-on-any fence arrays */
-
 		child = array->fences;
 		nchild = array->num_fences;
 		GEM_BUG_ON(!nchild);
@@ -1309,22 +1300,31 @@ i915_request_await_execution(struct i915_request *rq,
 			continue;
 		}
 
+		/*
+		 * Requests on the same timeline are explicitly ordered, along
+		 * with their dependencies, by i915_request_add() which ensures
+		 * that requests are submitted in-order through each ring.
+		 */
 		if (fence->context == rq->fence.context)
 			continue;
 
-		/*
-		 * We don't squash repeated fence dependencies here as we
-		 * want to run our callback in all cases.
-		 */
+		/* Squash repeated waits to the same timelines */
+		if (fence->context &&
+		    intel_timeline_sync_is_later(i915_request_timeline(rq),
+						 fence))
+			continue;
 
 		if (dma_fence_is_i915(fence))
-			ret = __i915_request_await_execution(rq,
-							     to_request(fence),
-							     hook);
+			ret = i915_request_await_request(rq, to_request(fence));
 		else
 			ret = i915_request_await_external(rq, fence);
 		if (ret < 0)
 			return ret;
+
+		/* Record the latest fence used against each timeline */
+		if (fence->context)
+			intel_timeline_sync_set(i915_request_timeline(rq),
+						fence);
 	} while (--nchild);
 
 	return 0;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
  2020-05-26  9:07 [Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request Chris Wilson
@ 2020-05-26  9:07   ` Chris Wilson
  2020-05-26 10:19 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Reorder await_execution before await_request Patchwork
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2020-05-26  9:07 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Tvrtko Ursulin, stable

When we push a virtual request onto the HW, we update the rq->engine to
point to the physical engine. A request that is then submitted by the
user that waits upon the virtual engine, but along the physical engine
in use, will then see that it is due to be submitted to the same engine
and take a shortcut (and be queued without waiting for the completion
fence). However, the virtual request may be preempted (either by higher
priority users, or by timeslicing) and removed from the physical engine
to be migrated over to one of its siblings. The dependent normal request
however is oblivious to the removal of the virtual request and remains
queued to execute on HW, believing that once it reaches the head of its
queue all of its predecessors will have completed executing!

v2: Beware restriction of signal->execution_mask prior to submission.

Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
Testcase: igt/gem_exec_balancer/sliced
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.3+
---
 drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 33bbad623e02..0b07ccc7e9bc 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
 	return 0;
 }
 
+static int
+await_request_submit(struct i915_request *to, struct i915_request *from)
+{
+	/*
+	 * If we are waiting on a virtual engine, then it may be
+	 * constrained to execute on a single engine *prior* to submission.
+	 * When it is submitted, it will be first submitted to the virtual
+	 * engine and then passed to the physical engine. We cannot allow
+	 * the waiter to be submitted immediately to the physical engine
+	 * as it may then bypass the virtual request.
+	 */
+	if (to->engine == READ_ONCE(from->engine))
+		return i915_sw_fence_await_sw_fence_gfp(&to->submit,
+							&from->submit,
+							I915_FENCE_GFP);
+	else
+		return __i915_request_await_execution(to, from, NULL);
+}
+
 static int
 i915_request_await_request(struct i915_request *to, struct i915_request *from)
 {
@@ -1258,10 +1277,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 			return ret;
 	}
 
-	if (to->engine == READ_ONCE(from->engine))
-		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
-						       &from->submit,
-						       I915_FENCE_GFP);
+	if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
+		ret = await_request_submit(to, from);
 	else
 		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
 	if (ret < 0)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
@ 2020-05-26  9:07   ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2020-05-26  9:07 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable, Chris Wilson

When we push a virtual request onto the HW, we update the rq->engine to
point to the physical engine. A request that is then submitted by the
user that waits upon the virtual engine, but along the physical engine
in use, will then see that it is due to be submitted to the same engine
and take a shortcut (and be queued without waiting for the completion
fence). However, the virtual request may be preempted (either by higher
priority users, or by timeslicing) and removed from the physical engine
to be migrated over to one of its siblings. The dependent normal request
however is oblivious to the removal of the virtual request and remains
queued to execute on HW, believing that once it reaches the head of its
queue all of its predecessors will have completed executing!

v2: Beware restriction of signal->execution_mask prior to submission.

Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
Testcase: igt/gem_exec_balancer/sliced
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.3+
---
 drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 33bbad623e02..0b07ccc7e9bc 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
 	return 0;
 }
 
+static int
+await_request_submit(struct i915_request *to, struct i915_request *from)
+{
+	/*
+	 * If we are waiting on a virtual engine, then it may be
+	 * constrained to execute on a single engine *prior* to submission.
+	 * When it is submitted, it will be first submitted to the virtual
+	 * engine and then passed to the physical engine. We cannot allow
+	 * the waiter to be submitted immediately to the physical engine
+	 * as it may then bypass the virtual request.
+	 */
+	if (to->engine == READ_ONCE(from->engine))
+		return i915_sw_fence_await_sw_fence_gfp(&to->submit,
+							&from->submit,
+							I915_FENCE_GFP);
+	else
+		return __i915_request_await_execution(to, from, NULL);
+}
+
 static int
 i915_request_await_request(struct i915_request *to, struct i915_request *from)
 {
@@ -1258,10 +1277,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 			return ret;
 	}
 
-	if (to->engine == READ_ONCE(from->engine))
-		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
-						       &from->submit,
-						       I915_FENCE_GFP);
+	if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
+		ret = await_request_submit(to, from);
 	else
 		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
 	if (ret < 0)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Reorder await_execution before await_request
  2020-05-26  9:07 [Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request Chris Wilson
  2020-05-26  9:07   ` [Intel-gfx] " Chris Wilson
@ 2020-05-26 10:19 ` Patchwork
  2020-05-26 12:55 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  2020-05-27  7:39 ` [Intel-gfx] [PATCH 1/2] " Tvrtko Ursulin
  3 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2020-05-26 10:19 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Reorder await_execution before await_request
URL   : https://patchwork.freedesktop.org/series/77653/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8537 -> Patchwork_17775
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/index.html

Known issues
------------

  Here are the changes found in Patchwork_17775 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_chamelium@hdmi-crc-fast:
    - fi-kbl-7500u:       [PASS][1] -> [DMESG-FAIL][2] ([i915#165])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/fi-kbl-7500u/igt@kms_chamelium@hdmi-crc-fast.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/fi-kbl-7500u/igt@kms_chamelium@hdmi-crc-fast.html

  
  [i915#165]: https://gitlab.freedesktop.org/drm/intel/issues/165


Participating hosts (50 -> 45)
------------------------------

  Additional (1): fi-kbl-7560u 
  Missing    (6): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_8537 -> Patchwork_17775

  CI-20190529: 20190529
  CI_DRM_8537: 05a47cd761c49c3fa87648c30df0fd4d64b0b8c5 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5678: c1f30ee09ac2e7eb3e8e90245239731a169a6050 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17775: 95d84329d1e9e74b00a4372c6b5b24a4d5a58d0c @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

95d84329d1e9 drm/i915/gt: Do not schedule normal requests immediately along virtual
acc962187d69 drm/i915: Reorder await_execution before await_request

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [1/2] drm/i915: Reorder await_execution before await_request
  2020-05-26  9:07 [Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request Chris Wilson
  2020-05-26  9:07   ` [Intel-gfx] " Chris Wilson
  2020-05-26 10:19 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Reorder await_execution before await_request Patchwork
@ 2020-05-26 12:55 ` Patchwork
  2020-05-27  7:39 ` [Intel-gfx] [PATCH 1/2] " Tvrtko Ursulin
  3 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2020-05-26 12:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Reorder await_execution before await_request
URL   : https://patchwork.freedesktop.org/series/77653/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8537_full -> Patchwork_17775_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_17775_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gen9_exec_parse@allowed-all:
    - shard-glk:          [PASS][1] -> [DMESG-WARN][2] ([i915#1436] / [i915#716])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-glk7/igt@gen9_exec_parse@allowed-all.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-glk6/igt@gen9_exec_parse@allowed-all.html

  * igt@kms_big_fb@linear-64bpp-rotate-0:
    - shard-glk:          [PASS][3] -> [FAIL][4] ([i915#1119] / [i915#118] / [i915#95])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-glk9/igt@kms_big_fb@linear-64bpp-rotate-0.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-glk8/igt@kms_big_fb@linear-64bpp-rotate-0.html

  * igt@kms_cursor_crc@pipe-c-cursor-128x42-offscreen:
    - shard-skl:          [PASS][5] -> [FAIL][6] ([i915#54])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-skl1/igt@kms_cursor_crc@pipe-c-cursor-128x42-offscreen.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-skl2/igt@kms_cursor_crc@pipe-c-cursor-128x42-offscreen.html

  * igt@kms_hdr@bpc-switch-suspend:
    - shard-skl:          [PASS][7] -> [FAIL][8] ([i915#1188])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-skl8/igt@kms_hdr@bpc-switch-suspend.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-skl8/igt@kms_hdr@bpc-switch-suspend.html

  * igt@kms_psr@psr2_suspend:
    - shard-iclb:         [PASS][9] -> [SKIP][10] ([fdo#109441]) +3 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-iclb2/igt@kms_psr@psr2_suspend.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-iclb4/igt@kms_psr@psr2_suspend.html

  * igt@kms_psr@suspend:
    - shard-skl:          [PASS][11] -> [INCOMPLETE][12] ([i915#198])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-skl7/igt@kms_psr@suspend.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-skl3/igt@kms_psr@suspend.html

  * igt@kms_setmode@basic:
    - shard-apl:          [PASS][13] -> [FAIL][14] ([i915#31])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-apl3/igt@kms_setmode@basic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-apl6/igt@kms_setmode@basic.html

  
#### Possible fixes ####

  * igt@i915_suspend@forcewake:
    - shard-apl:          [DMESG-WARN][15] ([i915#180]) -> [PASS][16] +1 similar issue
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-apl6/igt@i915_suspend@forcewake.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-apl1/igt@i915_suspend@forcewake.html

  * igt@kms_big_fb@y-tiled-64bpp-rotate-0:
    - shard-glk:          [FAIL][17] ([i915#1119] / [i915#118] / [i915#95]) -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-glk8/igt@kms_big_fb@y-tiled-64bpp-rotate-0.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-glk2/igt@kms_big_fb@y-tiled-64bpp-rotate-0.html

  * {igt@kms_flip@flip-vs-suspend@c-dp1}:
    - shard-kbl:          [DMESG-WARN][19] ([i915#180]) -> [PASS][20] +4 similar issues
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-kbl1/igt@kms_flip@flip-vs-suspend@c-dp1.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-kbl6/igt@kms_flip@flip-vs-suspend@c-dp1.html

  * {igt@kms_flip@plain-flip-fb-recreate-interruptible@c-dp1}:
    - shard-kbl:          [FAIL][21] ([i915#1928]) -> [PASS][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-kbl3/igt@kms_flip@plain-flip-fb-recreate-interruptible@c-dp1.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-kbl3/igt@kms_flip@plain-flip-fb-recreate-interruptible@c-dp1.html

  * {igt@kms_flip@plain-flip-fb-recreate-interruptible@c-edp1}:
    - shard-skl:          [FAIL][23] ([i915#1928]) -> [PASS][24]
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-skl5/igt@kms_flip@plain-flip-fb-recreate-interruptible@c-edp1.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-skl2/igt@kms_flip@plain-flip-fb-recreate-interruptible@c-edp1.html

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min:
    - shard-skl:          [FAIL][25] ([fdo#108145] / [i915#265]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-skl10/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-skl6/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html

  * igt@kms_psr@psr2_sprite_mmap_gtt:
    - shard-iclb:         [SKIP][27] ([fdo#109441]) -> [PASS][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-iclb7/igt@kms_psr@psr2_sprite_mmap_gtt.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-iclb2/igt@kms_psr@psr2_sprite_mmap_gtt.html

  * igt@kms_setmode@basic:
    - shard-kbl:          [FAIL][29] ([i915#31]) -> [PASS][30]
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-kbl4/igt@kms_setmode@basic.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-kbl7/igt@kms_setmode@basic.html

  * {igt@perf@polling-parameterized}:
    - shard-iclb:         [FAIL][31] ([i915#1542]) -> [PASS][32]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-iclb7/igt@perf@polling-parameterized.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-iclb2/igt@perf@polling-parameterized.html

  
#### Warnings ####

  * igt@i915_pm_dc@dc3co-vpb-simulation:
    - shard-iclb:         [SKIP][33] ([i915#588]) -> [SKIP][34] ([i915#658])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-iclb2/igt@i915_pm_dc@dc3co-vpb-simulation.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-iclb4/igt@i915_pm_dc@dc3co-vpb-simulation.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-tglb:         [FAIL][35] ([i915#454]) -> [SKIP][36] ([i915#468])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-tglb5/igt@i915_pm_dc@dc6-psr.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-tglb2/igt@i915_pm_dc@dc6-psr.html

  * igt@kms_cursor_legacy@cursorb-vs-flipa-toggle:
    - shard-glk:          [DMESG-FAIL][37] ([i915#1925]) -> [DMESG-WARN][38] ([i915#1926])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8537/shard-glk7/igt@kms_cursor_legacy@cursorb-vs-flipa-toggle.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/shard-glk7/igt@kms_cursor_legacy@cursorb-vs-flipa-toggle.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [i915#1119]: https://gitlab.freedesktop.org/drm/intel/issues/1119
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1925]: https://gitlab.freedesktop.org/drm/intel/issues/1925
  [i915#1926]: https://gitlab.freedesktop.org/drm/intel/issues/1926
  [i915#1928]: https://gitlab.freedesktop.org/drm/intel/issues/1928
  [i915#198]: https://gitlab.freedesktop.org/drm/intel/issues/198
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#468]: https://gitlab.freedesktop.org/drm/intel/issues/468
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#588]: https://gitlab.freedesktop.org/drm/intel/issues/588
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (11 -> 11)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * Linux: CI_DRM_8537 -> Patchwork_17775

  CI-20190529: 20190529
  CI_DRM_8537: 05a47cd761c49c3fa87648c30df0fd4d64b0b8c5 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5678: c1f30ee09ac2e7eb3e8e90245239731a169a6050 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17775: 95d84329d1e9e74b00a4372c6b5b24a4d5a58d0c @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17775/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
  2020-05-26  9:07   ` [Intel-gfx] " Chris Wilson
@ 2020-05-26 16:00     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2020-05-26 16:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 26/05/2020 10:07, Chris Wilson wrote:
> When we push a virtual request onto the HW, we update the rq->engine to
> point to the physical engine. A request that is then submitted by the
> user that waits upon the virtual engine, but along the physical engine
> in use, will then see that it is due to be submitted to the same engine
> and take a shortcut (and be queued without waiting for the completion
> fence). However, the virtual request may be preempted (either by higher
> priority users, or by timeslicing) and removed from the physical engine
> to be migrated over to one of its siblings. The dependent normal request
> however is oblivious to the removal of the virtual request and remains
> queued to execute on HW, believing that once it reaches the head of its
> queue all of its predecessors will have completed executing!
> 
> v2: Beware restriction of signal->execution_mask prior to submission.
> 
> Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
> Testcase: igt/gem_exec_balancer/sliced
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v5.3+
> ---
>   drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
>   1 file changed, 21 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 33bbad623e02..0b07ccc7e9bc 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
>   	return 0;
>   }
>   
> +static int
> +await_request_submit(struct i915_request *to, struct i915_request *from)
> +{
> +	/*
> +	 * If we are waiting on a virtual engine, then it may be
> +	 * constrained to execute on a single engine *prior* to submission.
> +	 * When it is submitted, it will be first submitted to the virtual
> +	 * engine and then passed to the physical engine. We cannot allow
> +	 * the waiter to be submitted immediately to the physical engine
> +	 * as it may then bypass the virtual request.
> +	 */
> +	if (to->engine == READ_ONCE(from->engine))
> +		return i915_sw_fence_await_sw_fence_gfp(&to->submit,
> +							&from->submit,
> +							I915_FENCE_GFP);
> +	else

When can engines be different and the mask test below brought us here?

Regards,

Tvrtko

> +		return __i915_request_await_execution(to, from, NULL);
> +}
> +
>   static int
>   i915_request_await_request(struct i915_request *to, struct i915_request *from)
>   {
> @@ -1258,10 +1277,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
>   			return ret;
>   	}
>   
> -	if (to->engine == READ_ONCE(from->engine))
> -		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
> -						       &from->submit,
> -						       I915_FENCE_GFP);
> +	if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
> +		ret = await_request_submit(to, from);
>   	else
>   		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
>   	if (ret < 0)
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
@ 2020-05-26 16:00     ` Tvrtko Ursulin
  0 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2020-05-26 16:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 26/05/2020 10:07, Chris Wilson wrote:
> When we push a virtual request onto the HW, we update the rq->engine to
> point to the physical engine. A request that is then submitted by the
> user that waits upon the virtual engine, but along the physical engine
> in use, will then see that it is due to be submitted to the same engine
> and take a shortcut (and be queued without waiting for the completion
> fence). However, the virtual request may be preempted (either by higher
> priority users, or by timeslicing) and removed from the physical engine
> to be migrated over to one of its siblings. The dependent normal request
> however is oblivious to the removal of the virtual request and remains
> queued to execute on HW, believing that once it reaches the head of its
> queue all of its predecessors will have completed executing!
> 
> v2: Beware restriction of signal->execution_mask prior to submission.
> 
> Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
> Testcase: igt/gem_exec_balancer/sliced
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v5.3+
> ---
>   drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
>   1 file changed, 21 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 33bbad623e02..0b07ccc7e9bc 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
>   	return 0;
>   }
>   
> +static int
> +await_request_submit(struct i915_request *to, struct i915_request *from)
> +{
> +	/*
> +	 * If we are waiting on a virtual engine, then it may be
> +	 * constrained to execute on a single engine *prior* to submission.
> +	 * When it is submitted, it will be first submitted to the virtual
> +	 * engine and then passed to the physical engine. We cannot allow
> +	 * the waiter to be submitted immediately to the physical engine
> +	 * as it may then bypass the virtual request.
> +	 */
> +	if (to->engine == READ_ONCE(from->engine))
> +		return i915_sw_fence_await_sw_fence_gfp(&to->submit,
> +							&from->submit,
> +							I915_FENCE_GFP);
> +	else

When can engines be different and the mask test below brought us here?

Regards,

Tvrtko

> +		return __i915_request_await_execution(to, from, NULL);
> +}
> +
>   static int
>   i915_request_await_request(struct i915_request *to, struct i915_request *from)
>   {
> @@ -1258,10 +1277,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
>   			return ret;
>   	}
>   
> -	if (to->engine == READ_ONCE(from->engine))
> -		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
> -						       &from->submit,
> -						       I915_FENCE_GFP);
> +	if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
> +		ret = await_request_submit(to, from);
>   	else
>   		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
>   	if (ret < 0)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
  2020-05-26 16:00     ` Tvrtko Ursulin
@ 2020-05-26 16:09       ` Chris Wilson
  -1 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2020-05-26 16:09 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: stable

Quoting Tvrtko Ursulin (2020-05-26 17:00:06)
> 
> On 26/05/2020 10:07, Chris Wilson wrote:
> > When we push a virtual request onto the HW, we update the rq->engine to
> > point to the physical engine. A request that is then submitted by the
> > user that waits upon the virtual engine, but along the physical engine
> > in use, will then see that it is due to be submitted to the same engine
> > and take a shortcut (and be queued without waiting for the completion
> > fence). However, the virtual request may be preempted (either by higher
> > priority users, or by timeslicing) and removed from the physical engine
> > to be migrated over to one of its siblings. The dependent normal request
> > however is oblivious to the removal of the virtual request and remains
> > queued to execute on HW, believing that once it reaches the head of its
> > queue all of its predecessors will have completed executing!
> > 
> > v2: Beware restriction of signal->execution_mask prior to submission.
> > 
> > Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
> > Testcase: igt/gem_exec_balancer/sliced
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: <stable@vger.kernel.org> # v5.3+
> > ---
> >   drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
> >   1 file changed, 21 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 33bbad623e02..0b07ccc7e9bc 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
> >       return 0;
> >   }
> >   
> > +static int
> > +await_request_submit(struct i915_request *to, struct i915_request *from)
> > +{
> > +     /*
> > +      * If we are waiting on a virtual engine, then it may be
> > +      * constrained to execute on a single engine *prior* to submission.
> > +      * When it is submitted, it will be first submitted to the virtual
> > +      * engine and then passed to the physical engine. We cannot allow
> > +      * the waiter to be submitted immediately to the physical engine
> > +      * as it may then bypass the virtual request.
> > +      */
> > +     if (to->engine == READ_ONCE(from->engine))
> > +             return i915_sw_fence_await_sw_fence_gfp(&to->submit,
> > +                                                     &from->submit,
> > +                                                     I915_FENCE_GFP);
> > +     else
> 
> When can engines be different and the mask test below brought us here?

We change the mask during evaluation of the bond, which is from the
signaler's signaler's execute_cb before the signaler is submitted. So
there will be a period where the from->execution_mask is constrained to
a single engine, but it is still waiting to be queued. Once it is
executed on HW, it will remain on that engine.
-Chris

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
@ 2020-05-26 16:09       ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2020-05-26 16:09 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: stable

Quoting Tvrtko Ursulin (2020-05-26 17:00:06)
> 
> On 26/05/2020 10:07, Chris Wilson wrote:
> > When we push a virtual request onto the HW, we update the rq->engine to
> > point to the physical engine. A request that is then submitted by the
> > user that waits upon the virtual engine, but along the physical engine
> > in use, will then see that it is due to be submitted to the same engine
> > and take a shortcut (and be queued without waiting for the completion
> > fence). However, the virtual request may be preempted (either by higher
> > priority users, or by timeslicing) and removed from the physical engine
> > to be migrated over to one of its siblings. The dependent normal request
> > however is oblivious to the removal of the virtual request and remains
> > queued to execute on HW, believing that once it reaches the head of its
> > queue all of its predecessors will have completed executing!
> > 
> > v2: Beware restriction of signal->execution_mask prior to submission.
> > 
> > Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
> > Testcase: igt/gem_exec_balancer/sliced
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: <stable@vger.kernel.org> # v5.3+
> > ---
> >   drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
> >   1 file changed, 21 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 33bbad623e02..0b07ccc7e9bc 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
> >       return 0;
> >   }
> >   
> > +static int
> > +await_request_submit(struct i915_request *to, struct i915_request *from)
> > +{
> > +     /*
> > +      * If we are waiting on a virtual engine, then it may be
> > +      * constrained to execute on a single engine *prior* to submission.
> > +      * When it is submitted, it will be first submitted to the virtual
> > +      * engine and then passed to the physical engine. We cannot allow
> > +      * the waiter to be submitted immediately to the physical engine
> > +      * as it may then bypass the virtual request.
> > +      */
> > +     if (to->engine == READ_ONCE(from->engine))
> > +             return i915_sw_fence_await_sw_fence_gfp(&to->submit,
> > +                                                     &from->submit,
> > +                                                     I915_FENCE_GFP);
> > +     else
> 
> When can engines be different and the mask test below brought us here?

We change the mask during evaluation of the bond, which is from the
signaler's signaler's execute_cb before the signaler is submitted. So
there will be a period where the from->execution_mask is constrained to
a single engine, but it is still waiting to be queued. Once it is
executed on HW, it will remain on that engine.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
  2020-05-26  9:07   ` [Intel-gfx] " Chris Wilson
@ 2020-05-27  6:51     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2020-05-27  6:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 26/05/2020 10:07, Chris Wilson wrote:
> When we push a virtual request onto the HW, we update the rq->engine to
> point to the physical engine. A request that is then submitted by the
> user that waits upon the virtual engine, but along the physical engine
> in use, will then see that it is due to be submitted to the same engine
> and take a shortcut (and be queued without waiting for the completion
> fence). However, the virtual request may be preempted (either by higher
> priority users, or by timeslicing) and removed from the physical engine
> to be migrated over to one of its siblings. The dependent normal request
> however is oblivious to the removal of the virtual request and remains
> queued to execute on HW, believing that once it reaches the head of its
> queue all of its predecessors will have completed executing!
> 
> v2: Beware restriction of signal->execution_mask prior to submission.
> 
> Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
> Testcase: igt/gem_exec_balancer/sliced
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v5.3+
> ---
>   drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
>   1 file changed, 21 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 33bbad623e02..0b07ccc7e9bc 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
>   	return 0;
>   }
>   
> +static int
> +await_request_submit(struct i915_request *to, struct i915_request *from)
> +{
> +	/*
> +	 * If we are waiting on a virtual engine, then it may be
> +	 * constrained to execute on a single engine *prior* to submission.
> +	 * When it is submitted, it will be first submitted to the virtual
> +	 * engine and then passed to the physical engine. We cannot allow
> +	 * the waiter to be submitted immediately to the physical engine
> +	 * as it may then bypass the virtual request.
> +	 */
> +	if (to->engine == READ_ONCE(from->engine))
> +		return i915_sw_fence_await_sw_fence_gfp(&to->submit,
> +							&from->submit,
> +							I915_FENCE_GFP);
> +	else
> +		return __i915_request_await_execution(to, from, NULL);

If I am following correctly this branch will be the virtual <-> physical 
or virtual <-> virtual dependency on the same physical engine. Why is 
await_execution sufficient in this case? Is there something preventing 
timeslicing between the two (not wanted right!) once from start 
execution (not finishes).

Regards,

Tvrtko

> +}
> +
>   static int
>   i915_request_await_request(struct i915_request *to, struct i915_request *from)
>   {
> @@ -1258,10 +1277,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
>   			return ret;
>   	}
>   
> -	if (to->engine == READ_ONCE(from->engine))
> -		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
> -						       &from->submit,
> -						       I915_FENCE_GFP);
> +	if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
> +		ret = await_request_submit(to, from);
>   	else
>   		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
>   	if (ret < 0)
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
@ 2020-05-27  6:51     ` Tvrtko Ursulin
  0 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2020-05-27  6:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 26/05/2020 10:07, Chris Wilson wrote:
> When we push a virtual request onto the HW, we update the rq->engine to
> point to the physical engine. A request that is then submitted by the
> user that waits upon the virtual engine, but along the physical engine
> in use, will then see that it is due to be submitted to the same engine
> and take a shortcut (and be queued without waiting for the completion
> fence). However, the virtual request may be preempted (either by higher
> priority users, or by timeslicing) and removed from the physical engine
> to be migrated over to one of its siblings. The dependent normal request
> however is oblivious to the removal of the virtual request and remains
> queued to execute on HW, believing that once it reaches the head of its
> queue all of its predecessors will have completed executing!
> 
> v2: Beware restriction of signal->execution_mask prior to submission.
> 
> Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
> Testcase: igt/gem_exec_balancer/sliced
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v5.3+
> ---
>   drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
>   1 file changed, 21 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 33bbad623e02..0b07ccc7e9bc 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
>   	return 0;
>   }
>   
> +static int
> +await_request_submit(struct i915_request *to, struct i915_request *from)
> +{
> +	/*
> +	 * If we are waiting on a virtual engine, then it may be
> +	 * constrained to execute on a single engine *prior* to submission.
> +	 * When it is submitted, it will be first submitted to the virtual
> +	 * engine and then passed to the physical engine. We cannot allow
> +	 * the waiter to be submitted immediately to the physical engine
> +	 * as it may then bypass the virtual request.
> +	 */
> +	if (to->engine == READ_ONCE(from->engine))
> +		return i915_sw_fence_await_sw_fence_gfp(&to->submit,
> +							&from->submit,
> +							I915_FENCE_GFP);
> +	else
> +		return __i915_request_await_execution(to, from, NULL);

If I am following correctly this branch will be the virtual <-> physical 
or virtual <-> virtual dependency on the same physical engine. Why is 
await_execution sufficient in this case? Is there something preventing 
timeslicing between the two (not wanted right!) once from start 
execution (not finishes).

Regards,

Tvrtko

> +}
> +
>   static int
>   i915_request_await_request(struct i915_request *to, struct i915_request *from)
>   {
> @@ -1258,10 +1277,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
>   			return ret;
>   	}
>   
> -	if (to->engine == READ_ONCE(from->engine))
> -		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
> -						       &from->submit,
> -						       I915_FENCE_GFP);
> +	if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
> +		ret = await_request_submit(to, from);
>   	else
>   		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
>   	if (ret < 0)
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
  2020-05-27  6:51     ` Tvrtko Ursulin
@ 2020-05-27  7:03       ` Chris Wilson
  -1 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2020-05-27  7:03 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: stable

Quoting Tvrtko Ursulin (2020-05-27 07:51:44)
> 
> On 26/05/2020 10:07, Chris Wilson wrote:
> > When we push a virtual request onto the HW, we update the rq->engine to
> > point to the physical engine. A request that is then submitted by the
> > user that waits upon the virtual engine, but along the physical engine
> > in use, will then see that it is due to be submitted to the same engine
> > and take a shortcut (and be queued without waiting for the completion
> > fence). However, the virtual request may be preempted (either by higher
> > priority users, or by timeslicing) and removed from the physical engine
> > to be migrated over to one of its siblings. The dependent normal request
> > however is oblivious to the removal of the virtual request and remains
> > queued to execute on HW, believing that once it reaches the head of its
> > queue all of its predecessors will have completed executing!
> > 
> > v2: Beware restriction of signal->execution_mask prior to submission.
> > 
> > Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
> > Testcase: igt/gem_exec_balancer/sliced
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: <stable@vger.kernel.org> # v5.3+
> > ---
> >   drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
> >   1 file changed, 21 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 33bbad623e02..0b07ccc7e9bc 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
> >       return 0;
> >   }
> >   
> > +static int
> > +await_request_submit(struct i915_request *to, struct i915_request *from)
> > +{
> > +     /*
> > +      * If we are waiting on a virtual engine, then it may be
> > +      * constrained to execute on a single engine *prior* to submission.
> > +      * When it is submitted, it will be first submitted to the virtual
> > +      * engine and then passed to the physical engine. We cannot allow
> > +      * the waiter to be submitted immediately to the physical engine
> > +      * as it may then bypass the virtual request.
> > +      */
> > +     if (to->engine == READ_ONCE(from->engine))
> > +             return i915_sw_fence_await_sw_fence_gfp(&to->submit,
> > +                                                     &from->submit,
> > +                                                     I915_FENCE_GFP);
> > +     else
> > +             return __i915_request_await_execution(to, from, NULL);
> 
> If I am following correctly this branch will be the virtual <-> physical 
> or virtual <-> virtual dependency on the same physical engine. Why is 
> await_execution sufficient in this case? Is there something preventing 
> timeslicing between the two (not wanted right!) once from start 
> execution (not finishes).

Timeslicing is only between independent requests. When we expire a
request, we also expire all of its waiters along the same engine, so
that the execution order remains intact.

await_execution is a more restrictive form of the
await_sw_fence(submit), in that 'to' can only be submitted once 'from'
reaches HW and not simply once 'from' reaches its engine submission
queue.
-Chris

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
@ 2020-05-27  7:03       ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2020-05-27  7:03 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: stable

Quoting Tvrtko Ursulin (2020-05-27 07:51:44)
> 
> On 26/05/2020 10:07, Chris Wilson wrote:
> > When we push a virtual request onto the HW, we update the rq->engine to
> > point to the physical engine. A request that is then submitted by the
> > user that waits upon the virtual engine, but along the physical engine
> > in use, will then see that it is due to be submitted to the same engine
> > and take a shortcut (and be queued without waiting for the completion
> > fence). However, the virtual request may be preempted (either by higher
> > priority users, or by timeslicing) and removed from the physical engine
> > to be migrated over to one of its siblings. The dependent normal request
> > however is oblivious to the removal of the virtual request and remains
> > queued to execute on HW, believing that once it reaches the head of its
> > queue all of its predecessors will have completed executing!
> > 
> > v2: Beware restriction of signal->execution_mask prior to submission.
> > 
> > Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
> > Testcase: igt/gem_exec_balancer/sliced
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: <stable@vger.kernel.org> # v5.3+
> > ---
> >   drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
> >   1 file changed, 21 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 33bbad623e02..0b07ccc7e9bc 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
> >       return 0;
> >   }
> >   
> > +static int
> > +await_request_submit(struct i915_request *to, struct i915_request *from)
> > +{
> > +     /*
> > +      * If we are waiting on a virtual engine, then it may be
> > +      * constrained to execute on a single engine *prior* to submission.
> > +      * When it is submitted, it will be first submitted to the virtual
> > +      * engine and then passed to the physical engine. We cannot allow
> > +      * the waiter to be submitted immediately to the physical engine
> > +      * as it may then bypass the virtual request.
> > +      */
> > +     if (to->engine == READ_ONCE(from->engine))
> > +             return i915_sw_fence_await_sw_fence_gfp(&to->submit,
> > +                                                     &from->submit,
> > +                                                     I915_FENCE_GFP);
> > +     else
> > +             return __i915_request_await_execution(to, from, NULL);
> 
> If I am following correctly this branch will be the virtual <-> physical 
> or virtual <-> virtual dependency on the same physical engine. Why is 
> await_execution sufficient in this case? Is there something preventing 
> timeslicing between the two (not wanted right!) once from start 
> execution (not finishes).

Timeslicing is only between independent requests. When we expire a
request, we also expire all of its waiters along the same engine, so
that the execution order remains intact.

await_execution is a more restrictive form of the
await_sw_fence(submit), in that 'to' can only be submitted once 'from'
reaches HW and not simply once 'from' reaches its engine submission
queue.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
  2020-05-27  7:03       ` Chris Wilson
@ 2020-05-27  7:32         ` Tvrtko Ursulin
  -1 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2020-05-27  7:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 27/05/2020 08:03, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-05-27 07:51:44)
>>
>> On 26/05/2020 10:07, Chris Wilson wrote:
>>> When we push a virtual request onto the HW, we update the rq->engine to
>>> point to the physical engine. A request that is then submitted by the
>>> user that waits upon the virtual engine, but along the physical engine
>>> in use, will then see that it is due to be submitted to the same engine
>>> and take a shortcut (and be queued without waiting for the completion
>>> fence). However, the virtual request may be preempted (either by higher
>>> priority users, or by timeslicing) and removed from the physical engine
>>> to be migrated over to one of its siblings. The dependent normal request
>>> however is oblivious to the removal of the virtual request and remains
>>> queued to execute on HW, believing that once it reaches the head of its
>>> queue all of its predecessors will have completed executing!
>>>
>>> v2: Beware restriction of signal->execution_mask prior to submission.
>>>
>>> Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
>>> Testcase: igt/gem_exec_balancer/sliced
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: <stable@vger.kernel.org> # v5.3+
>>> ---
>>>    drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
>>>    1 file changed, 21 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>> index 33bbad623e02..0b07ccc7e9bc 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>> @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
>>>        return 0;
>>>    }
>>>    
>>> +static int
>>> +await_request_submit(struct i915_request *to, struct i915_request *from)
>>> +{
>>> +     /*
>>> +      * If we are waiting on a virtual engine, then it may be
>>> +      * constrained to execute on a single engine *prior* to submission.
>>> +      * When it is submitted, it will be first submitted to the virtual
>>> +      * engine and then passed to the physical engine. We cannot allow
>>> +      * the waiter to be submitted immediately to the physical engine
>>> +      * as it may then bypass the virtual request.
>>> +      */
>>> +     if (to->engine == READ_ONCE(from->engine))
>>> +             return i915_sw_fence_await_sw_fence_gfp(&to->submit,
>>> +                                                     &from->submit,
>>> +                                                     I915_FENCE_GFP);
>>> +     else
>>> +             return __i915_request_await_execution(to, from, NULL);
>>
>> If I am following correctly this branch will be the virtual <-> physical
>> or virtual <-> virtual dependency on the same physical engine. Why is
>> await_execution sufficient in this case? Is there something preventing
>> timeslicing between the two (not wanted right!) once from start
>> execution (not finishes).
> 
> Timeslicing is only between independent requests. When we expire a
> request, we also expire all of its waiters along the same engine, so
> that the execution order remains intact.

Via scheduler dependencies - they are enough?

Regards,

Tvrtko

> 
> await_execution is a more restrictive form of the
> await_sw_fence(submit), in that 'to' can only be submitted once 'from'
> reaches HW and not simply once 'from' reaches its engine submission
> queue.




^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
@ 2020-05-27  7:32         ` Tvrtko Ursulin
  0 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2020-05-27  7:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 27/05/2020 08:03, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-05-27 07:51:44)
>>
>> On 26/05/2020 10:07, Chris Wilson wrote:
>>> When we push a virtual request onto the HW, we update the rq->engine to
>>> point to the physical engine. A request that is then submitted by the
>>> user that waits upon the virtual engine, but along the physical engine
>>> in use, will then see that it is due to be submitted to the same engine
>>> and take a shortcut (and be queued without waiting for the completion
>>> fence). However, the virtual request may be preempted (either by higher
>>> priority users, or by timeslicing) and removed from the physical engine
>>> to be migrated over to one of its siblings. The dependent normal request
>>> however is oblivious to the removal of the virtual request and remains
>>> queued to execute on HW, believing that once it reaches the head of its
>>> queue all of its predecessors will have completed executing!
>>>
>>> v2: Beware restriction of signal->execution_mask prior to submission.
>>>
>>> Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
>>> Testcase: igt/gem_exec_balancer/sliced
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: <stable@vger.kernel.org> # v5.3+
>>> ---
>>>    drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
>>>    1 file changed, 21 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>> index 33bbad623e02..0b07ccc7e9bc 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>> @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
>>>        return 0;
>>>    }
>>>    
>>> +static int
>>> +await_request_submit(struct i915_request *to, struct i915_request *from)
>>> +{
>>> +     /*
>>> +      * If we are waiting on a virtual engine, then it may be
>>> +      * constrained to execute on a single engine *prior* to submission.
>>> +      * When it is submitted, it will be first submitted to the virtual
>>> +      * engine and then passed to the physical engine. We cannot allow
>>> +      * the waiter to be submitted immediately to the physical engine
>>> +      * as it may then bypass the virtual request.
>>> +      */
>>> +     if (to->engine == READ_ONCE(from->engine))
>>> +             return i915_sw_fence_await_sw_fence_gfp(&to->submit,
>>> +                                                     &from->submit,
>>> +                                                     I915_FENCE_GFP);
>>> +     else
>>> +             return __i915_request_await_execution(to, from, NULL);
>>
>> If I am following correctly this branch will be the virtual <-> physical
>> or virtual <-> virtual dependency on the same physical engine. Why is
>> await_execution sufficient in this case? Is there something preventing
>> timeslicing between the two (not wanted right!) once from start
>> execution (not finishes).
> 
> Timeslicing is only between independent requests. When we expire a
> request, we also expire all of its waiters along the same engine, so
> that the execution order remains intact.

Via scheduler dependencies - they are enough?

Regards,

Tvrtko

> 
> await_execution is a more restrictive form of the
> await_sw_fence(submit), in that 'to' can only be submitted once 'from'
> reaches HW and not simply once 'from' reaches its engine submission
> queue.



_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request
  2020-05-26  9:07 [Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request Chris Wilson
                   ` (2 preceding siblings ...)
  2020-05-26 12:55 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
@ 2020-05-27  7:39 ` Tvrtko Ursulin
  3 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2020-05-27  7:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 26/05/2020 10:07, Chris Wilson wrote:
> Reorder the code so that we can reuse the await_execution from a special
> case in await_request in the next patch.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_request.c | 264 ++++++++++++++--------------
>   1 file changed, 132 insertions(+), 132 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index c282719ad3ac..33bbad623e02 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1053,37 +1053,91 @@ emit_semaphore_wait(struct i915_request *to,
>   					     I915_FENCE_GFP);
>   }
>   
> +static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
> +					  struct dma_fence *fence)
> +{
> +	return __intel_timeline_sync_is_later(tl,
> +					      fence->context,
> +					      fence->seqno - 1);
> +}
> +
> +static int intel_timeline_sync_set_start(struct intel_timeline *tl,
> +					 const struct dma_fence *fence)
> +{
> +	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
> +}
> +
>   static int
> -i915_request_await_request(struct i915_request *to, struct i915_request *from)
> +__i915_request_await_execution(struct i915_request *to,
> +			       struct i915_request *from,
> +			       void (*hook)(struct i915_request *rq,
> +					    struct dma_fence *signal))
>   {
> -	int ret;
> +	int err;
>   
> -	GEM_BUG_ON(to == from);
> -	GEM_BUG_ON(to->timeline == from->timeline);
> +	GEM_BUG_ON(intel_context_is_barrier(from->context));
>   
> -	if (i915_request_completed(from)) {
> -		i915_sw_fence_set_error_once(&to->submit, from->fence.error);
> +	/* Submit both requests at the same time */
> +	err = __await_execution(to, from, hook, I915_FENCE_GFP);
> +	if (err)
> +		return err;
> +
> +	/* Squash repeated depenendices to the same timelines */
> +	if (intel_timeline_sync_has_start(i915_request_timeline(to),
> +					  &from->fence))
>   		return 0;
> +
> +	/*
> +	 * Wait until the start of this request.
> +	 *
> +	 * The execution cb fires when we submit the request to HW. But in
> +	 * many cases this may be long before the request itself is ready to
> +	 * run (consider that we submit 2 requests for the same context, where
> +	 * the request of interest is behind an indefinite spinner). So we hook
> +	 * up to both to reduce our queues and keep the execution lag minimised
> +	 * in the worst case, though we hope that the await_start is elided.
> +	 */
> +	err = i915_request_await_start(to, from);
> +	if (err < 0)
> +		return err;
> +
> +	/*
> +	 * Ensure both start together [after all semaphores in signal]
> +	 *
> +	 * Now that we are queued to the HW at roughly the same time (thanks
> +	 * to the execute cb) and are ready to run at roughly the same time
> +	 * (thanks to the await start), our signaler may still be indefinitely
> +	 * delayed by waiting on a semaphore from a remote engine. If our
> +	 * signaler depends on a semaphore, so indirectly do we, and we do not
> +	 * want to start our payload until our signaler also starts theirs.
> +	 * So we wait.
> +	 *
> +	 * However, there is also a second condition for which we need to wait
> +	 * for the precise start of the signaler. Consider that the signaler
> +	 * was submitted in a chain of requests following another context
> +	 * (with just an ordinary intra-engine fence dependency between the
> +	 * two). In this case the signaler is queued to HW, but not for
> +	 * immediate execution, and so we must wait until it reaches the
> +	 * active slot.
> +	 */
> +	if (intel_engine_has_semaphores(to->engine) &&
> +	    !i915_request_has_initial_breadcrumb(to)) {
> +		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
> +		if (err < 0)
> +			return err;
>   	}
>   
> +	/* Couple the dependency tree for PI on this exposed to->fence */
>   	if (to->engine->schedule) {
> -		ret = i915_sched_node_add_dependency(&to->sched,
> +		err = i915_sched_node_add_dependency(&to->sched,
>   						     &from->sched,
> -						     I915_DEPENDENCY_EXTERNAL);
> -		if (ret < 0)
> -			return ret;
> +						     I915_DEPENDENCY_WEAK);
> +		if (err < 0)
> +			return err;
>   	}
>   
> -	if (to->engine == from->engine)
> -		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
> -						       &from->submit,
> -						       I915_FENCE_GFP);
> -	else
> -		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
> -	if (ret < 0)
> -		return ret;
> -
> -	return 0;
> +	return intel_timeline_sync_set_start(i915_request_timeline(to),
> +					     &from->fence);
>   }
>   
>   static void mark_external(struct i915_request *rq)
> @@ -1136,23 +1190,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
>   }
>   
>   int
> -i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
> +i915_request_await_execution(struct i915_request *rq,
> +			     struct dma_fence *fence,
> +			     void (*hook)(struct i915_request *rq,
> +					  struct dma_fence *signal))
>   {
>   	struct dma_fence **child = &fence;
>   	unsigned int nchild = 1;
>   	int ret;
>   
> -	/*
> -	 * Note that if the fence-array was created in signal-on-any mode,
> -	 * we should *not* decompose it into its individual fences. However,
> -	 * we don't currently store which mode the fence-array is operating
> -	 * in. Fortunately, the only user of signal-on-any is private to
> -	 * amdgpu and we should not see any incoming fence-array from
> -	 * sync-file being in signal-on-any mode.
> -	 */
>   	if (dma_fence_is_array(fence)) {
>   		struct dma_fence_array *array = to_dma_fence_array(fence);
>   
> +		/* XXX Error for signal-on-any fence arrays */
> +
>   		child = array->fences;
>   		nchild = array->num_fences;
>   		GEM_BUG_ON(!nchild);
> @@ -1165,138 +1216,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>   			continue;
>   		}
>   
> -		/*
> -		 * Requests on the same timeline are explicitly ordered, along
> -		 * with their dependencies, by i915_request_add() which ensures
> -		 * that requests are submitted in-order through each ring.
> -		 */
>   		if (fence->context == rq->fence.context)
>   			continue;
>   
> -		/* Squash repeated waits to the same timelines */
> -		if (fence->context &&
> -		    intel_timeline_sync_is_later(i915_request_timeline(rq),
> -						 fence))
> -			continue;
> +		/*
> +		 * We don't squash repeated fence dependencies here as we
> +		 * want to run our callback in all cases.
> +		 */
>   
>   		if (dma_fence_is_i915(fence))
> -			ret = i915_request_await_request(rq, to_request(fence));
> +			ret = __i915_request_await_execution(rq,
> +							     to_request(fence),
> +							     hook);
>   		else
>   			ret = i915_request_await_external(rq, fence);
>   		if (ret < 0)
>   			return ret;
> -
> -		/* Record the latest fence used against each timeline */
> -		if (fence->context)
> -			intel_timeline_sync_set(i915_request_timeline(rq),
> -						fence);
>   	} while (--nchild);
>   
>   	return 0;
>   }
>   
> -static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
> -					  struct dma_fence *fence)
> -{
> -	return __intel_timeline_sync_is_later(tl,
> -					      fence->context,
> -					      fence->seqno - 1);
> -}
> -
> -static int intel_timeline_sync_set_start(struct intel_timeline *tl,
> -					 const struct dma_fence *fence)
> -{
> -	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
> -}
> -
>   static int
> -__i915_request_await_execution(struct i915_request *to,
> -			       struct i915_request *from,
> -			       void (*hook)(struct i915_request *rq,
> -					    struct dma_fence *signal))
> +i915_request_await_request(struct i915_request *to, struct i915_request *from)
>   {
> -	int err;
> -
> -	GEM_BUG_ON(intel_context_is_barrier(from->context));
> +	int ret;
>   
> -	/* Submit both requests at the same time */
> -	err = __await_execution(to, from, hook, I915_FENCE_GFP);
> -	if (err)
> -		return err;
> +	GEM_BUG_ON(to == from);
> +	GEM_BUG_ON(to->timeline == from->timeline);
>   
> -	/* Squash repeated depenendices to the same timelines */
> -	if (intel_timeline_sync_has_start(i915_request_timeline(to),
> -					  &from->fence))
> +	if (i915_request_completed(from)) {
> +		i915_sw_fence_set_error_once(&to->submit, from->fence.error);
>   		return 0;
> -
> -	/*
> -	 * Wait until the start of this request.
> -	 *
> -	 * The execution cb fires when we submit the request to HW. But in
> -	 * many cases this may be long before the request itself is ready to
> -	 * run (consider that we submit 2 requests for the same context, where
> -	 * the request of interest is behind an indefinite spinner). So we hook
> -	 * up to both to reduce our queues and keep the execution lag minimised
> -	 * in the worst case, though we hope that the await_start is elided.
> -	 */
> -	err = i915_request_await_start(to, from);
> -	if (err < 0)
> -		return err;
> -
> -	/*
> -	 * Ensure both start together [after all semaphores in signal]
> -	 *
> -	 * Now that we are queued to the HW at roughly the same time (thanks
> -	 * to the execute cb) and are ready to run at roughly the same time
> -	 * (thanks to the await start), our signaler may still be indefinitely
> -	 * delayed by waiting on a semaphore from a remote engine. If our
> -	 * signaler depends on a semaphore, so indirectly do we, and we do not
> -	 * want to start our payload until our signaler also starts theirs.
> -	 * So we wait.
> -	 *
> -	 * However, there is also a second condition for which we need to wait
> -	 * for the precise start of the signaler. Consider that the signaler
> -	 * was submitted in a chain of requests following another context
> -	 * (with just an ordinary intra-engine fence dependency between the
> -	 * two). In this case the signaler is queued to HW, but not for
> -	 * immediate execution, and so we must wait until it reaches the
> -	 * active slot.
> -	 */
> -	if (intel_engine_has_semaphores(to->engine) &&
> -	    !i915_request_has_initial_breadcrumb(to)) {
> -		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
> -		if (err < 0)
> -			return err;
>   	}
>   
> -	/* Couple the dependency tree for PI on this exposed to->fence */
>   	if (to->engine->schedule) {
> -		err = i915_sched_node_add_dependency(&to->sched,
> +		ret = i915_sched_node_add_dependency(&to->sched,
>   						     &from->sched,
> -						     I915_DEPENDENCY_WEAK);
> -		if (err < 0)
> -			return err;
> +						     I915_DEPENDENCY_EXTERNAL);
> +		if (ret < 0)
> +			return ret;
>   	}
>   
> -	return intel_timeline_sync_set_start(i915_request_timeline(to),
> -					     &from->fence);
> +	if (to->engine == READ_ONCE(from->engine))
> +		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
> +						       &from->submit,
> +						       I915_FENCE_GFP);
> +	else
> +		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
> +	if (ret < 0)
> +		return ret;
> +
> +	return 0;
>   }
>   
>   int
> -i915_request_await_execution(struct i915_request *rq,
> -			     struct dma_fence *fence,
> -			     void (*hook)(struct i915_request *rq,
> -					  struct dma_fence *signal))
> +i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>   {
>   	struct dma_fence **child = &fence;
>   	unsigned int nchild = 1;
>   	int ret;
>   
> +	/*
> +	 * Note that if the fence-array was created in signal-on-any mode,
> +	 * we should *not* decompose it into its individual fences. However,
> +	 * we don't currently store which mode the fence-array is operating
> +	 * in. Fortunately, the only user of signal-on-any is private to
> +	 * amdgpu and we should not see any incoming fence-array from
> +	 * sync-file being in signal-on-any mode.
> +	 */
>   	if (dma_fence_is_array(fence)) {
>   		struct dma_fence_array *array = to_dma_fence_array(fence);
>   
> -		/* XXX Error for signal-on-any fence arrays */
> -
>   		child = array->fences;
>   		nchild = array->num_fences;
>   		GEM_BUG_ON(!nchild);
> @@ -1309,22 +1300,31 @@ i915_request_await_execution(struct i915_request *rq,
>   			continue;
>   		}
>   
> +		/*
> +		 * Requests on the same timeline are explicitly ordered, along
> +		 * with their dependencies, by i915_request_add() which ensures
> +		 * that requests are submitted in-order through each ring.
> +		 */
>   		if (fence->context == rq->fence.context)
>   			continue;
>   
> -		/*
> -		 * We don't squash repeated fence dependencies here as we
> -		 * want to run our callback in all cases.
> -		 */
> +		/* Squash repeated waits to the same timelines */
> +		if (fence->context &&
> +		    intel_timeline_sync_is_later(i915_request_timeline(rq),
> +						 fence))
> +			continue;
>   
>   		if (dma_fence_is_i915(fence))
> -			ret = __i915_request_await_execution(rq,
> -							     to_request(fence),
> -							     hook);
> +			ret = i915_request_await_request(rq, to_request(fence));
>   		else
>   			ret = i915_request_await_external(rq, fence);
>   		if (ret < 0)
>   			return ret;
> +
> +		/* Record the latest fence used against each timeline */
> +		if (fence->context)
> +			intel_timeline_sync_set(i915_request_timeline(rq),
> +						fence);
>   	} while (--nchild);
>   
>   	return 0;
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
  2020-05-27  7:32         ` Tvrtko Ursulin
@ 2020-05-27  7:47           ` Chris Wilson
  -1 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2020-05-27  7:47 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: stable

Quoting Tvrtko Ursulin (2020-05-27 08:32:05)
> 
> On 27/05/2020 08:03, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-05-27 07:51:44)
> >>
> >> On 26/05/2020 10:07, Chris Wilson wrote:
> >>> When we push a virtual request onto the HW, we update the rq->engine to
> >>> point to the physical engine. A request that is then submitted by the
> >>> user that waits upon the virtual engine, but along the physical engine
> >>> in use, will then see that it is due to be submitted to the same engine
> >>> and take a shortcut (and be queued without waiting for the completion
> >>> fence). However, the virtual request may be preempted (either by higher
> >>> priority users, or by timeslicing) and removed from the physical engine
> >>> to be migrated over to one of its siblings. The dependent normal request
> >>> however is oblivious to the removal of the virtual request and remains
> >>> queued to execute on HW, believing that once it reaches the head of its
> >>> queue all of its predecessors will have completed executing!
> >>>
> >>> v2: Beware restriction of signal->execution_mask prior to submission.
> >>>
> >>> Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
> >>> Testcase: igt/gem_exec_balancer/sliced
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> Cc: <stable@vger.kernel.org> # v5.3+
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
> >>>    1 file changed, 21 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> >>> index 33bbad623e02..0b07ccc7e9bc 100644
> >>> --- a/drivers/gpu/drm/i915/i915_request.c
> >>> +++ b/drivers/gpu/drm/i915/i915_request.c
> >>> @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
> >>>        return 0;
> >>>    }
> >>>    
> >>> +static int
> >>> +await_request_submit(struct i915_request *to, struct i915_request *from)
> >>> +{
> >>> +     /*
> >>> +      * If we are waiting on a virtual engine, then it may be
> >>> +      * constrained to execute on a single engine *prior* to submission.
> >>> +      * When it is submitted, it will be first submitted to the virtual
> >>> +      * engine and then passed to the physical engine. We cannot allow
> >>> +      * the waiter to be submitted immediately to the physical engine
> >>> +      * as it may then bypass the virtual request.
> >>> +      */
> >>> +     if (to->engine == READ_ONCE(from->engine))
> >>> +             return i915_sw_fence_await_sw_fence_gfp(&to->submit,
> >>> +                                                     &from->submit,
> >>> +                                                     I915_FENCE_GFP);
> >>> +     else
> >>> +             return __i915_request_await_execution(to, from, NULL);
> >>
> >> If I am following correctly this branch will be the virtual <-> physical
> >> or virtual <-> virtual dependency on the same physical engine. Why is
> >> await_execution sufficient in this case? Is there something preventing
> >> timeslicing between the two (not wanted right!) once from start
> >> execution (not finishes).
> > 
> > Timeslicing is only between independent requests. When we expire a
> > request, we also expire all of its waiters along the same engine, so
> > that the execution order remains intact.
> 
> Via scheduler dependencies - they are enough?

Yes.
-Chris

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
@ 2020-05-27  7:47           ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2020-05-27  7:47 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: stable

Quoting Tvrtko Ursulin (2020-05-27 08:32:05)
> 
> On 27/05/2020 08:03, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-05-27 07:51:44)
> >>
> >> On 26/05/2020 10:07, Chris Wilson wrote:
> >>> When we push a virtual request onto the HW, we update the rq->engine to
> >>> point to the physical engine. A request that is then submitted by the
> >>> user that waits upon the virtual engine, but along the physical engine
> >>> in use, will then see that it is due to be submitted to the same engine
> >>> and take a shortcut (and be queued without waiting for the completion
> >>> fence). However, the virtual request may be preempted (either by higher
> >>> priority users, or by timeslicing) and removed from the physical engine
> >>> to be migrated over to one of its siblings. The dependent normal request
> >>> however is oblivious to the removal of the virtual request and remains
> >>> queued to execute on HW, believing that once it reaches the head of its
> >>> queue all of its predecessors will have completed executing!
> >>>
> >>> v2: Beware restriction of signal->execution_mask prior to submission.
> >>>
> >>> Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
> >>> Testcase: igt/gem_exec_balancer/sliced
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> Cc: <stable@vger.kernel.org> # v5.3+
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
> >>>    1 file changed, 21 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> >>> index 33bbad623e02..0b07ccc7e9bc 100644
> >>> --- a/drivers/gpu/drm/i915/i915_request.c
> >>> +++ b/drivers/gpu/drm/i915/i915_request.c
> >>> @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
> >>>        return 0;
> >>>    }
> >>>    
> >>> +static int
> >>> +await_request_submit(struct i915_request *to, struct i915_request *from)
> >>> +{
> >>> +     /*
> >>> +      * If we are waiting on a virtual engine, then it may be
> >>> +      * constrained to execute on a single engine *prior* to submission.
> >>> +      * When it is submitted, it will be first submitted to the virtual
> >>> +      * engine and then passed to the physical engine. We cannot allow
> >>> +      * the waiter to be submitted immediately to the physical engine
> >>> +      * as it may then bypass the virtual request.
> >>> +      */
> >>> +     if (to->engine == READ_ONCE(from->engine))
> >>> +             return i915_sw_fence_await_sw_fence_gfp(&to->submit,
> >>> +                                                     &from->submit,
> >>> +                                                     I915_FENCE_GFP);
> >>> +     else
> >>> +             return __i915_request_await_execution(to, from, NULL);
> >>
> >> If I am following correctly this branch will be the virtual <-> physical
> >> or virtual <-> virtual dependency on the same physical engine. Why is
> >> await_execution sufficient in this case? Is there something preventing
> >> timeslicing between the two (not wanted right!) once from start
> >> execution (not finishes).
> > 
> > Timeslicing is only between independent requests. When we expire a
> > request, we also expire all of its waiters along the same engine, so
> > that the execution order remains intact.
> 
> Via scheduler dependencies - they are enough?

Yes.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
  2020-05-27  7:47           ` Chris Wilson
@ 2020-05-27  7:50             ` Tvrtko Ursulin
  -1 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2020-05-27  7:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 27/05/2020 08:47, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-05-27 08:32:05)
>>
>> On 27/05/2020 08:03, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2020-05-27 07:51:44)
>>>>
>>>> On 26/05/2020 10:07, Chris Wilson wrote:
>>>>> When we push a virtual request onto the HW, we update the rq->engine to
>>>>> point to the physical engine. A request that is then submitted by the
>>>>> user that waits upon the virtual engine, but along the physical engine
>>>>> in use, will then see that it is due to be submitted to the same engine
>>>>> and take a shortcut (and be queued without waiting for the completion
>>>>> fence). However, the virtual request may be preempted (either by higher
>>>>> priority users, or by timeslicing) and removed from the physical engine
>>>>> to be migrated over to one of its siblings. The dependent normal request
>>>>> however is oblivious to the removal of the virtual request and remains
>>>>> queued to execute on HW, believing that once it reaches the head of its
>>>>> queue all of its predecessors will have completed executing!
>>>>>
>>>>> v2: Beware restriction of signal->execution_mask prior to submission.
>>>>>
>>>>> Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
>>>>> Testcase: igt/gem_exec_balancer/sliced
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Cc: <stable@vger.kernel.org> # v5.3+
>>>>> ---
>>>>>     drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
>>>>>     1 file changed, 21 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>>>> index 33bbad623e02..0b07ccc7e9bc 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>>>> @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
>>>>>         return 0;
>>>>>     }
>>>>>     
>>>>> +static int
>>>>> +await_request_submit(struct i915_request *to, struct i915_request *from)
>>>>> +{
>>>>> +     /*
>>>>> +      * If we are waiting on a virtual engine, then it may be
>>>>> +      * constrained to execute on a single engine *prior* to submission.
>>>>> +      * When it is submitted, it will be first submitted to the virtual
>>>>> +      * engine and then passed to the physical engine. We cannot allow
>>>>> +      * the waiter to be submitted immediately to the physical engine
>>>>> +      * as it may then bypass the virtual request.
>>>>> +      */
>>>>> +     if (to->engine == READ_ONCE(from->engine))
>>>>> +             return i915_sw_fence_await_sw_fence_gfp(&to->submit,
>>>>> +                                                     &from->submit,
>>>>> +                                                     I915_FENCE_GFP);
>>>>> +     else
>>>>> +             return __i915_request_await_execution(to, from, NULL);
>>>>
>>>> If I am following correctly this branch will be the virtual <-> physical
>>>> or virtual <-> virtual dependency on the same physical engine. Why is
>>>> await_execution sufficient in this case? Is there something preventing
>>>> timeslicing between the two (not wanted right!) once from start
>>>> execution (not finishes).
>>>
>>> Timeslicing is only between independent requests. When we expire a
>>> request, we also expire all of its waiters along the same engine, so
>>> that the execution order remains intact.
>>
>> Via scheduler dependencies - they are enough?
> 
> Yes.

Okay, I really need to use this all more often rather than just review..

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual
@ 2020-05-27  7:50             ` Tvrtko Ursulin
  0 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2020-05-27  7:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 27/05/2020 08:47, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-05-27 08:32:05)
>>
>> On 27/05/2020 08:03, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2020-05-27 07:51:44)
>>>>
>>>> On 26/05/2020 10:07, Chris Wilson wrote:
>>>>> When we push a virtual request onto the HW, we update the rq->engine to
>>>>> point to the physical engine. A request that is then submitted by the
>>>>> user that waits upon the virtual engine, but along the physical engine
>>>>> in use, will then see that it is due to be submitted to the same engine
>>>>> and take a shortcut (and be queued without waiting for the completion
>>>>> fence). However, the virtual request may be preempted (either by higher
>>>>> priority users, or by timeslicing) and removed from the physical engine
>>>>> to be migrated over to one of its siblings. The dependent normal request
>>>>> however is oblivious to the removal of the virtual request and remains
>>>>> queued to execute on HW, believing that once it reaches the head of its
>>>>> queue all of its predecessors will have completed executing!
>>>>>
>>>>> v2: Beware restriction of signal->execution_mask prior to submission.
>>>>>
>>>>> Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine")
>>>>> Testcase: igt/gem_exec_balancer/sliced
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Cc: <stable@vger.kernel.org> # v5.3+
>>>>> ---
>>>>>     drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++----
>>>>>     1 file changed, 21 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
>>>>> index 33bbad623e02..0b07ccc7e9bc 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_request.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_request.c
>>>>> @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq,
>>>>>         return 0;
>>>>>     }
>>>>>     
>>>>> +static int
>>>>> +await_request_submit(struct i915_request *to, struct i915_request *from)
>>>>> +{
>>>>> +     /*
>>>>> +      * If we are waiting on a virtual engine, then it may be
>>>>> +      * constrained to execute on a single engine *prior* to submission.
>>>>> +      * When it is submitted, it will be first submitted to the virtual
>>>>> +      * engine and then passed to the physical engine. We cannot allow
>>>>> +      * the waiter to be submitted immediately to the physical engine
>>>>> +      * as it may then bypass the virtual request.
>>>>> +      */
>>>>> +     if (to->engine == READ_ONCE(from->engine))
>>>>> +             return i915_sw_fence_await_sw_fence_gfp(&to->submit,
>>>>> +                                                     &from->submit,
>>>>> +                                                     I915_FENCE_GFP);
>>>>> +     else
>>>>> +             return __i915_request_await_execution(to, from, NULL);
>>>>
>>>> If I am following correctly this branch will be the virtual <-> physical
>>>> or virtual <-> virtual dependency on the same physical engine. Why is
>>>> await_execution sufficient in this case? Is there something preventing
>>>> timeslicing between the two (not wanted right!) once from start
>>>> execution (not finishes).
>>>
>>> Timeslicing is only between independent requests. When we expire a
>>> request, we also expire all of its waiters along the same engine, so
>>> that the execution order remains intact.
>>
>> Via scheduler dependencies - they are enough?
> 
> Yes.

Okay, I really need to use this all more often rather than just review..

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2020-05-27  7:50 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-26  9:07 [Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request Chris Wilson
2020-05-26  9:07 ` [PATCH 2/2] drm/i915/gt: Do not schedule normal requests immediately along virtual Chris Wilson
2020-05-26  9:07   ` [Intel-gfx] " Chris Wilson
2020-05-26 16:00   ` Tvrtko Ursulin
2020-05-26 16:00     ` Tvrtko Ursulin
2020-05-26 16:09     ` Chris Wilson
2020-05-26 16:09       ` Chris Wilson
2020-05-27  6:51   ` Tvrtko Ursulin
2020-05-27  6:51     ` Tvrtko Ursulin
2020-05-27  7:03     ` Chris Wilson
2020-05-27  7:03       ` Chris Wilson
2020-05-27  7:32       ` Tvrtko Ursulin
2020-05-27  7:32         ` Tvrtko Ursulin
2020-05-27  7:47         ` Chris Wilson
2020-05-27  7:47           ` Chris Wilson
2020-05-27  7:50           ` Tvrtko Ursulin
2020-05-27  7:50             ` Tvrtko Ursulin
2020-05-26 10:19 ` [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Reorder await_execution before await_request Patchwork
2020-05-26 12:55 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2020-05-27  7:39 ` [Intel-gfx] [PATCH 1/2] " Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.