All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling
@ 2019-04-29 18:00 Chris Wilson
  2019-04-29 18:00 ` [PATCH 2/5] drm/i915: Only reschedule the submission tasklet if preemption is possible Chris Wilson
                   ` (8 more replies)
  0 siblings, 9 replies; 18+ messages in thread
From: Chris Wilson @ 2019-04-29 18:00 UTC (permalink / raw)
  To: intel-gfx

When the system is idling, contention for struct_mutex should be low and
so we will be more efficient to wait for a contended mutex than
reschedule.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_pm.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c
index 3554d55dae35..3b6e8d5be8e1 100644
--- a/drivers/gpu/drm/i915/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/i915_gem_pm.c
@@ -47,13 +47,7 @@ static void idle_work_handler(struct work_struct *work)
 	struct drm_i915_private *i915 =
 		container_of(work, typeof(*i915), gem.idle_work.work);
 
-	if (!mutex_trylock(&i915->drm.struct_mutex)) {
-		/* Currently busy, come back later */
-		mod_delayed_work(i915->wq,
-				 &i915->gem.idle_work,
-				 msecs_to_jiffies(50));
-		return;
-	}
+	mutex_lock(&i915->drm.struct_mutex);
 
 	intel_wakeref_lock(&i915->gt.wakeref);
 	if (!intel_wakeref_active(&i915->gt.wakeref))
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/5] drm/i915: Only reschedule the submission tasklet if preemption is possible
  2019-04-29 18:00 [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling Chris Wilson
@ 2019-04-29 18:00 ` Chris Wilson
  2019-04-30  9:35   ` [PATCH v2] " Chris Wilson
  2019-04-29 18:00 ` [PATCH 3/5] drm/i915: Delay semaphore submission until the start of the signaler Chris Wilson
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 18+ messages in thread
From: Chris Wilson @ 2019-04-29 18:00 UTC (permalink / raw)
  To: intel-gfx

If we couple the scheduler more tightly with the execlists policy, we
can apply the preemption policy to the question of whether we need to
kick the tasklet at all for this priority bump.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c |  7 ++++++-
 drivers/gpu/drm/i915/i915_request.c    |  2 --
 drivers/gpu/drm/i915/i915_scheduler.c  | 18 +++++++++++-------
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 84538f69185b..4b042893dc0e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -638,14 +638,19 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	GEM_BUG_ON(i915_request_completed(rq));
 
 	i915_sw_fence_init(&rq->submit, dummy_notify);
-	i915_sw_fence_commit(&rq->submit);
+	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 
 	return rq;
 }
 
 static void dummy_request_free(struct i915_request *dummy)
 {
+	/* We have to fake the CS interrupt to kick the next request */
+	i915_sw_fence_commit(&dummy->submit);
+
 	i915_request_mark_complete(dummy);
+	dma_fence_signal(&dummy->fence);
+
 	i915_sched_node_fini(&dummy->sched);
 	i915_sw_fence_fini(&dummy->submit);
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index af8c9fa5e066..2e22da66a56c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1358,9 +1358,7 @@ long i915_request_wait(struct i915_request *rq,
 	if (flags & I915_WAIT_PRIORITY) {
 		if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
 			gen6_rps_boost(rq);
-		local_bh_disable(); /* suspend tasklets for reprioritisation */
 		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
-		local_bh_enable(); /* kick tasklets en masse */
 	}
 
 	wait.tsk = current;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 39bc4f54e272..4913418387be 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -261,16 +261,20 @@ sched_lock_engine(const struct i915_sched_node *node,
 	return engine;
 }
 
-static bool inflight(const struct i915_request *rq,
-		     const struct intel_engine_cs *engine)
+static inline int rq_prio(const struct i915_request *rq)
 {
-	const struct i915_request *active;
+	return rq->sched.attr.priority | __NO_PREEMPTION;
+}
+
+static bool kick_tasklet(const struct intel_engine_cs *engine, int prio)
+{
+	const struct i915_request *inflight =
+		port_request(engine->execlists.port);
 
-	if (!i915_request_is_active(rq))
+	if (!inflight)
 		return false;
 
-	active = port_request(engine->execlists.port);
-	return active->hw_context == rq->hw_context;
+	return __execlists_need_preempt(prio, rq_prio(inflight));
 }
 
 static void __i915_schedule(struct i915_request *rq,
@@ -400,7 +404,7 @@ static void __i915_schedule(struct i915_request *rq,
 		 * If we are already the currently executing context, don't
 		 * bother evaluating if we should preempt ourselves.
 		 */
-		if (inflight(node_to_request(node), engine))
+		if (!kick_tasklet(engine, prio))
 			continue;
 
 		/* Defer (tasklet) submission until after all of our updates. */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/5] drm/i915: Delay semaphore submission until the start of the signaler
  2019-04-29 18:00 [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling Chris Wilson
  2019-04-29 18:00 ` [PATCH 2/5] drm/i915: Only reschedule the submission tasklet if preemption is possible Chris Wilson
@ 2019-04-29 18:00 ` Chris Wilson
  2019-04-29 18:00 ` [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems Chris Wilson
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2019-04-29 18:00 UTC (permalink / raw)
  To: intel-gfx

Currently we submit the semaphore busywait as soon as the signaler is
submitted to HW. However, we may submit the signaler as the tail of a
batch of requests, and even not as the first context in the HW list,
i.e. the busywait may start spinning far in advance of the signaler even
starting.

If we wait until the request before the signaler is completed before
submitting the busywait, we prevent the busywait from starting too
early, if the signaler is not first in submission port.

To handle the case where the signaler is at the start of the second (or
later) submission port, we will need to delay the execution callback
until we know the context is promoted to port0. A challenge for later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 2e22da66a56c..8cb3ed5531e3 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -770,6 +770,21 @@ i915_request_create(struct intel_context *ce)
 	return rq;
 }
 
+static int
+i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
+{
+	if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+		return 0;
+
+	signal = list_prev_entry(signal, ring_link);
+	if (i915_timeline_sync_is_later(rq->timeline, &signal->fence))
+		return 0;
+
+	return i915_sw_fence_await_dma_fence(&rq->submit,
+					     &signal->fence, 0,
+					     I915_FENCE_GFP);
+}
+
 static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
@@ -788,6 +803,10 @@ emit_semaphore_wait(struct i915_request *to,
 						     &from->fence, 0,
 						     I915_FENCE_GFP);
 
+	err = i915_request_await_start(to, from);
+	if (err < 0)
+		return err;
+
 	err = i915_sw_fence_await_dma_fence(&to->semaphore,
 					    &from->fence, 0,
 					    I915_FENCE_GFP);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems
  2019-04-29 18:00 [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling Chris Wilson
  2019-04-29 18:00 ` [PATCH 2/5] drm/i915: Only reschedule the submission tasklet if preemption is possible Chris Wilson
  2019-04-29 18:00 ` [PATCH 3/5] drm/i915: Delay semaphore submission until the start of the signaler Chris Wilson
@ 2019-04-29 18:00 ` Chris Wilson
  2019-04-30  8:55   ` Tvrtko Ursulin
                     ` (2 more replies)
  2019-04-29 18:00 ` [PATCH 5/5] drm/i915/execlists: Don't apply priority boost for resets Chris Wilson
                   ` (5 subsequent siblings)
  8 siblings, 3 replies; 18+ messages in thread
From: Chris Wilson @ 2019-04-29 18:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Dmitry Ermilov

Asking the GPU to busywait on a memory address, perhaps not unexpectedly
in hindsight for a shared system, leads to bus contention that affects
CPU programs trying to concurrently access memory. This can manifest as
a drop in transcode throughput on highly over-saturated workloads.

The only clue offered by perf, is that the bus-cycles (perf stat -e
bus-cycles) jumped by 50% when enabling semaphores. This corresponds
with extra CPU active cycles being attributed to intel_idle's mwait.

This patch introduces a heuristic to try and detect when more than one
client is submitting to the GPU pushing it into an oversaturated state.
As we already keep track of when the semaphores are signaled, we can
inspect their state on submitting the busywait batch and if we planned
to use a semaphore but were too late, conclude that the GPU is
overloaded and not try to use semaphores in future requests. In
practice, this means we optimistically try to use semaphores for the
first frame of a transcode job split over multiple engines, and fail is
there are multiple clients active and continue not to use semaphores for
the subsequent frames in the sequence. Periodically, trying to
optimistically switch semaphores back on whenever the client waits to
catch up with the transcode results.

With 1 client, on Broxton J3455, with the relative fps normalized by %cpu:

x no semaphores
+ drm-tip
* throttle
+------------------------------------------------------------------------+
|                                                    *                   |
|                                                    *+                  |
|                                                    **+                 |
|                                                    **+  x              |
|                                x               *  +**+  x              |
|                                x  x       *    *  +***x xx             |
|                                x  x       *    * *+***x *x             |
|                                x  x*   +  *    * *****x *x x           |
|                         +    x xx+x*   + ***   * ********* x   *       |
|                         +    x xx+x*   * *** +** ********* xx  *       |
|    *   +         ++++*  +    x*x****+*+* ***+*************+x*  *       |
|*+ +** *+ + +* + *++****** *xxx**********x***+*****************+*++    *|
|                                   |__________A_____M_____|             |
|                           |_______________A____M_________|             |
|                                 |____________A___M________|            |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x 120       2.60475       3.50941       3.31123     3.2143953    0.21117399
+ 120        2.3826       3.57077       3.25101     3.1414161    0.28146407
Difference at 95.0% confidence
	-0.0729792 +/- 0.0629585
	-2.27039% +/- 1.95864%
	(Student's t, pooled s = 0.248814)
* 120       2.35536       3.66713        3.2849     3.2059917    0.24618565
No difference proven at 95.0% confidence

With 10 clients over-saturating the pipeline:

x no semaphores
+ drm-tip
* patch
+------------------------------------------------------------------------+
|                     ++                                        **       |
|                     ++                                        **       |
|                     ++                                        **       |
|                     ++                                        **       |
|                     ++                                    xx ***       |
|                     ++                                    xx ***       |
|                     ++                                    xxx***       |
|                     ++                                    xxx***       |
|                    +++                                    xxx***       |
|                    +++                                    xx****       |
|                    +++                                    xx****       |
|                    +++                                    xx****       |
|                    +++                                    xx****       |
|                    ++++                                   xx****       |
|                   +++++                                   xx****       |
|                   +++++                                 x x******      |
|                  ++++++                                 xxx*******     |
|                  ++++++                                 xxx*******     |
|                  ++++++                                 xxx*******     |
|                  ++++++                                 xx********     |
|                  ++++++                               xxxx********     |
|                  ++++++                               xxxx********     |
|                ++++++++                             xxxxx*********     |
|+ +  +        + ++++++++                           xxx*xx**********x*  *|
|                                                         |__A__|        |
|                 |__AM__|                                               |
|                                                            |__A_|      |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x 120       2.47855        2.8972       2.72376     2.7193402   0.074604933
+ 120       1.17367       1.77459       1.71977     1.6966782   0.085850697
Difference at 95.0% confidence
	-1.02266 +/- 0.0203502
	-37.607% +/- 0.748352%
	(Student's t, pooled s = 0.0804246)
* 120       2.57868       3.00821       2.80142     2.7923878   0.058646477
Difference at 95.0% confidence
	0.0730476 +/- 0.0169791
	2.68622% +/- 0.624383%
	(Student's t, pooled s = 0.0671018)

Indicating that we've recovered the regression from enabling semaphores
on this saturated setup, with a hint towards an overall improvement.

Very similar, but of smaller magnitude, results are observed on both
Skylake(gt2) and Kabylake(gt4). This may be due to the reduced impact of
bus-cycles, where we see a 50% hit on Broxton, it is only 10% on the big
core, in this particular test.

One observation to make here is that for a greedy client trying to
maximise its own throughput, using semaphores is the right choice. It is
only the holistic system-wide view that semaphores of one client
impacts another and reduces the overall throughput where we would choose
to disable semaphores.

The most noticeable negactive impact this has is on the no-op
microbenchmarks, which are also very notable for having no cpu bus load.
In particular, this increases the runtime and energy consumption of
gem_exec_whisper.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Dmitry Ermilov <dmitry.ermilov@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       |  2 ++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  3 ++
 drivers/gpu/drm/i915/i915_request.c           | 28 ++++++++++++++++++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 1f1761fc6597..5b31e1e05ddd 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -116,6 +116,7 @@ intel_context_init(struct intel_context *ce,
 	ce->engine = engine;
 	ce->ops = engine->cops;
 	ce->sseu = engine->sseu;
+	ce->saturated = 0;
 
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
@@ -158,6 +159,7 @@ void intel_context_enter_engine(struct intel_context *ce)
 
 void intel_context_exit_engine(struct intel_context *ce)
 {
+	ce->saturated = 0;
 	intel_engine_pm_put(ce->engine);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index d5a7dbd0daee..963a312430e6 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 
 #include "i915_active_types.h"
+#include "intel_engine_types.h"
 #include "intel_sseu.h"
 
 struct i915_gem_context;
@@ -52,6 +53,8 @@ struct intel_context {
 	atomic_t pin_count;
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
 
+	intel_engine_mask_t saturated; /* submitting semaphores too late? */
+
 	/**
 	 * active_tracker: Active tracker for the external rq activity
 	 * on this intel_context object.
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 8cb3ed5531e3..2d429967f403 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -410,6 +410,26 @@ void __i915_request_submit(struct i915_request *request)
 	if (i915_gem_context_is_banned(request->gem_context))
 		i915_request_skip(request, -EIO);
 
+	/*
+	 * Are we using semaphores when the gpu is already saturated?
+	 *
+	 * Using semaphores incurs a cost in having the GPU poll a
+	 * memory location, busywaiting for it to change. The continual
+	 * memory reads can have a noticeable impact on the rest of the
+	 * system with the extra bus traffic, stalling the cpu as it too
+	 * tries to access memory across the bus (perf stat -e bus-cycles).
+	 *
+	 * If we installed a semaphore on this request and we only submit
+	 * the request after the signaler completed, that indicates the
+	 * system is overloaded and using semaphores at this time only
+	 * increases the amount of work we are doing. If so, we disable
+	 * further use of semaphores until we are idle again, whence we
+	 * optimistically try again.
+	 */
+	if (request->sched.semaphores &&
+	    i915_sw_fence_signaled(&request->semaphore))
+		request->hw_context->saturated |= request->sched.semaphores;
+
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
@@ -785,6 +805,12 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
 					     I915_FENCE_GFP);
 }
 
+static intel_engine_mask_t
+already_busywaiting(struct i915_request *rq)
+{
+	return rq->sched.semaphores | rq->hw_context->saturated;
+}
+
 static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
@@ -798,7 +824,7 @@ emit_semaphore_wait(struct i915_request *to,
 	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
 
 	/* Just emit the first semaphore we see as request space is limited. */
-	if (to->sched.semaphores & from->engine->mask)
+	if (already_busywaiting(to) & from->engine->mask)
 		return i915_sw_fence_await_dma_fence(&to->submit,
 						     &from->fence, 0,
 						     I915_FENCE_GFP);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/5] drm/i915/execlists: Don't apply priority boost for resets
  2019-04-29 18:00 [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling Chris Wilson
                   ` (2 preceding siblings ...)
  2019-04-29 18:00 ` [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems Chris Wilson
@ 2019-04-29 18:00 ` Chris Wilson
  2019-04-29 18:08 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling Patchwork
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2019-04-29 18:00 UTC (permalink / raw)
  To: intel-gfx

Do not treat reset as a normal preemption event and avoid giving the
guilty request a priority boost for simply being active at the time of
reset.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 01f58a152a9e..d7fc21535be2 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -370,11 +370,11 @@ static void unwind_wa_tail(struct i915_request *rq)
 }
 
 static struct i915_request *
-__unwind_incomplete_requests(struct intel_engine_cs *engine)
+__unwind_incomplete_requests(struct intel_engine_cs *engine, int boost)
 {
 	struct i915_request *rq, *rn, *active = NULL;
 	struct list_head *uninitialized_var(pl);
-	int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY;
+	int prio = I915_PRIORITY_INVALID | boost;
 
 	lockdep_assert_held(&engine->timeline.lock);
 
@@ -418,8 +418,9 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	 * in the priority queue, but they will not gain immediate access to
 	 * the GPU.
 	 */
-	if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
-		prio |= ACTIVE_PRIORITY;
+	if (~prio & boost && __i915_request_has_started(active)) {
+		prio |= boost;
+		GEM_BUG_ON(active->sched.attr.priority >= prio);
 		active->sched.attr.priority = prio;
 		list_move_tail(&active->sched.link,
 			       i915_sched_lookup_priolist(engine, prio));
@@ -434,7 +435,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
 	struct intel_engine_cs *engine =
 		container_of(execlists, typeof(*engine), execlists);
 
-	return __unwind_incomplete_requests(engine);
+	return __unwind_incomplete_requests(engine, 0);
 }
 
 static inline void
@@ -655,7 +656,8 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 	execlists_cancel_port_requests(execlists);
 	__unwind_incomplete_requests(container_of(execlists,
 						  struct intel_engine_cs,
-						  execlists));
+						  execlists),
+				     ACTIVE_PRIORITY);
 }
 
 static void execlists_dequeue(struct intel_engine_cs *engine)
@@ -1908,7 +1910,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	execlists_cancel_port_requests(execlists);
 
 	/* Push back any incomplete requests for replay after the reset. */
-	rq = __unwind_incomplete_requests(engine);
+	rq = __unwind_incomplete_requests(engine, 0);
 	if (!rq)
 		goto out_replay;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling
  2019-04-29 18:00 [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling Chris Wilson
                   ` (3 preceding siblings ...)
  2019-04-29 18:00 ` [PATCH 5/5] drm/i915/execlists: Don't apply priority boost for resets Chris Wilson
@ 2019-04-29 18:08 ` Patchwork
  2019-04-30  7:39 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2019-04-29 18:08 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Wait for the struct_mutex on idling
URL   : https://patchwork.freedesktop.org/series/60072/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Wait for the struct_mutex on idling
Okay!

Commit: drm/i915: Only reschedule the submission tasklet if preemption is possible
+drivers/gpu/drm/i915/gt/intel_engine.h:124:23: warning: expression using sizeof(void)

Commit: drm/i915: Delay semaphore submission until the start of the signaler
Okay!

Commit: drm/i915: Disable semaphore busywaits on saturated systems
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915/execlists: Don't apply priority boost for resets
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling
  2019-04-29 18:00 [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling Chris Wilson
                   ` (4 preceding siblings ...)
  2019-04-29 18:08 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling Patchwork
@ 2019-04-30  7:39 ` Patchwork
  2019-04-30 11:12 ` ✗ Fi.CI.IGT: failure " Patchwork
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2019-04-30  7:39 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Wait for the struct_mutex on idling
URL   : https://patchwork.freedesktop.org/series/60072/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6012 -> Patchwork_12899
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/60072/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_12899:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@kms_chamelium@dp-hpd-fast:
    - {fi-cml-u2}:        [FAIL][1] ([fdo#108767]) -> [SKIP][2] +8 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/fi-cml-u2/igt@kms_chamelium@dp-hpd-fast.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/fi-cml-u2/igt@kms_chamelium@dp-hpd-fast.html

  
Known issues
------------

  Here are the changes found in Patchwork_12899 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_contexts:
    - fi-skl-gvtdvm:      [PASS][3] -> [DMESG-FAIL][4] ([fdo#110235])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/fi-skl-gvtdvm/igt@i915_selftest@live_contexts.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/fi-skl-gvtdvm/igt@i915_selftest@live_contexts.html

  * igt@i915_selftest@live_hangcheck:
    - fi-skl-iommu:       [PASS][5] -> [INCOMPLETE][6] ([fdo#108602] / [fdo#108744])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/fi-skl-iommu/igt@i915_selftest@live_hangcheck.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/fi-skl-iommu/igt@i915_selftest@live_hangcheck.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-kbl-7500u:       [PASS][7] -> [DMESG-WARN][8] ([fdo#103841])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/fi-kbl-7500u/igt@kms_chamelium@dp-crc-fast.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/fi-kbl-7500u/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_cursor_legacy@basic-flip-after-cursor-varying-size:
    - fi-glk-dsi:         [PASS][9] -> [INCOMPLETE][10] ([fdo#103359] / [k.org#198133])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/fi-glk-dsi/igt@kms_cursor_legacy@basic-flip-after-cursor-varying-size.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/fi-glk-dsi/igt@kms_cursor_legacy@basic-flip-after-cursor-varying-size.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - fi-blb-e6850:       [PASS][11] -> [INCOMPLETE][12] ([fdo#107718])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/fi-blb-e6850/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/fi-blb-e6850/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  
#### Possible fixes ####

  * igt@amdgpu/amd_basic@userptr:
    - fi-kbl-8809g:       [DMESG-WARN][13] ([fdo#108965]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/fi-kbl-8809g/igt@amdgpu/amd_basic@userptr.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/fi-kbl-8809g/igt@amdgpu/amd_basic@userptr.html

  * igt@gem_exec_basic@basic-blt:
    - {fi-icl-u2}:        [INCOMPLETE][15] ([fdo#107713] / [fdo#110246]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/fi-icl-u2/igt@gem_exec_basic@basic-blt.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/fi-icl-u2/igt@gem_exec_basic@basic-blt.html

  * igt@gem_exec_fence@basic-busy-default:
    - fi-icl-y:           [INCOMPLETE][17] ([fdo#107713]) -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/fi-icl-y/igt@gem_exec_fence@basic-busy-default.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/fi-icl-y/igt@gem_exec_fence@basic-busy-default.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103841]: https://bugs.freedesktop.org/show_bug.cgi?id=103841
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#108602]: https://bugs.freedesktop.org/show_bug.cgi?id=108602
  [fdo#108744]: https://bugs.freedesktop.org/show_bug.cgi?id=108744
  [fdo#108767]: https://bugs.freedesktop.org/show_bug.cgi?id=108767
  [fdo#108965]: https://bugs.freedesktop.org/show_bug.cgi?id=108965
  [fdo#110235]: https://bugs.freedesktop.org/show_bug.cgi?id=110235
  [fdo#110246]: https://bugs.freedesktop.org/show_bug.cgi?id=110246
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (52 -> 44)
------------------------------

  Missing    (8): fi-kbl-soraka fi-ilk-m540 fi-bxt-dsi fi-bsw-n3050 fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_6012 -> Patchwork_12899

  CI_DRM_6012: e4882f199157e3fb73d1791352931096f6ecfcfd @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4968: caed251990f35bfe45368f803980071a73e36315 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12899: 5ee78e3a3354cb0c235a681776696b7dbbbd173e @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

5ee78e3a3354 drm/i915/execlists: Don't apply priority boost for resets
c4e20db01287 drm/i915: Disable semaphore busywaits on saturated systems
d7f332cc342f drm/i915: Delay semaphore submission until the start of the signaler
bb407988cf13 drm/i915: Only reschedule the submission tasklet if preemption is possible
a67d0c84337f drm/i915: Wait for the struct_mutex on idling

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems
  2019-04-29 18:00 ` [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems Chris Wilson
@ 2019-04-30  8:55   ` Tvrtko Ursulin
  2019-04-30  9:04     ` Chris Wilson
  2019-04-30  9:07     ` Chris Wilson
  2019-05-03 13:25   ` Tvrtko Ursulin
  2019-05-03 14:04   ` Ville Syrjälä
  2 siblings, 2 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2019-04-30  8:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Dmitry Ermilov


On 29/04/2019 19:00, Chris Wilson wrote:
> Asking the GPU to busywait on a memory address, perhaps not unexpectedly
> in hindsight for a shared system, leads to bus contention that affects
> CPU programs trying to concurrently access memory. This can manifest as
> a drop in transcode throughput on highly over-saturated workloads.
> 
> The only clue offered by perf, is that the bus-cycles (perf stat -e
> bus-cycles) jumped by 50% when enabling semaphores. This corresponds
> with extra CPU active cycles being attributed to intel_idle's mwait.
> 
> This patch introduces a heuristic to try and detect when more than one
> client is submitting to the GPU pushing it into an oversaturated state.
> As we already keep track of when the semaphores are signaled, we can
> inspect their state on submitting the busywait batch and if we planned
> to use a semaphore but were too late, conclude that the GPU is
> overloaded and not try to use semaphores in future requests. In
> practice, this means we optimistically try to use semaphores for the
> first frame of a transcode job split over multiple engines, and fail is
> there are multiple clients active and continue not to use semaphores for
> the subsequent frames in the sequence. Periodically, trying to
> optimistically switch semaphores back on whenever the client waits to
> catch up with the transcode results.
> 

[snipped long benchmark results]

> Indicating that we've recovered the regression from enabling semaphores
> on this saturated setup, with a hint towards an overall improvement.
> 
> Very similar, but of smaller magnitude, results are observed on both
> Skylake(gt2) and Kabylake(gt4). This may be due to the reduced impact of
> bus-cycles, where we see a 50% hit on Broxton, it is only 10% on the big
> core, in this particular test.
> 
> One observation to make here is that for a greedy client trying to
> maximise its own throughput, using semaphores is the right choice. It is
> only the holistic system-wide view that semaphores of one client
> impacts another and reduces the overall throughput where we would choose
> to disable semaphores.

Since we acknowledge problem is the shared nature of the iGPU, my 
concern is that we still cannot account for both partners here when 
deciding to omit semaphore emission. In other words we trade bus 
throughput for submission latency.

Assuming a light GPU task (in the sense of not oversubscribing, but with 
ping-pong inter-engine dependencies), simultaneous to a heavier CPU 
task, our latency improvement still imposes a performance penalty on the 
latter.

For instance a consumer level single stream transcoding session with CPU 
heavy part of the pipeline, or a CPU intensive game.

(Ideally we would need a bus saturation signal to feed into our logic, 
not just engine saturation. Which I don't think is possible.)

So I am still leaning towards being cautious and just abandoning 
semaphores for now.

Regards,

Tvrtko

> The most noticeable negactive impact this has is on the no-op
> microbenchmarks, which are also very notable for having no cpu bus load.
> In particular, this increases the runtime and energy consumption of
> gem_exec_whisper.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> Cc: Dmitry Ermilov <dmitry.ermilov@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.c       |  2 ++
>   drivers/gpu/drm/i915/gt/intel_context_types.h |  3 ++
>   drivers/gpu/drm/i915/i915_request.c           | 28 ++++++++++++++++++-
>   3 files changed, 32 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 1f1761fc6597..5b31e1e05ddd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -116,6 +116,7 @@ intel_context_init(struct intel_context *ce,
>   	ce->engine = engine;
>   	ce->ops = engine->cops;
>   	ce->sseu = engine->sseu;
> +	ce->saturated = 0;
>   
>   	INIT_LIST_HEAD(&ce->signal_link);
>   	INIT_LIST_HEAD(&ce->signals);
> @@ -158,6 +159,7 @@ void intel_context_enter_engine(struct intel_context *ce)
>   
>   void intel_context_exit_engine(struct intel_context *ce)
>   {
> +	ce->saturated = 0;
>   	intel_engine_pm_put(ce->engine);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index d5a7dbd0daee..963a312430e6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -13,6 +13,7 @@
>   #include <linux/types.h>
>   
>   #include "i915_active_types.h"
> +#include "intel_engine_types.h"
>   #include "intel_sseu.h"
>   
>   struct i915_gem_context;
> @@ -52,6 +53,8 @@ struct intel_context {
>   	atomic_t pin_count;
>   	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
>   
> +	intel_engine_mask_t saturated; /* submitting semaphores too late? */
> +
>   	/**
>   	 * active_tracker: Active tracker for the external rq activity
>   	 * on this intel_context object.
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 8cb3ed5531e3..2d429967f403 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -410,6 +410,26 @@ void __i915_request_submit(struct i915_request *request)
>   	if (i915_gem_context_is_banned(request->gem_context))
>   		i915_request_skip(request, -EIO);
>   
> +	/*
> +	 * Are we using semaphores when the gpu is already saturated?
> +	 *
> +	 * Using semaphores incurs a cost in having the GPU poll a
> +	 * memory location, busywaiting for it to change. The continual
> +	 * memory reads can have a noticeable impact on the rest of the
> +	 * system with the extra bus traffic, stalling the cpu as it too
> +	 * tries to access memory across the bus (perf stat -e bus-cycles).
> +	 *
> +	 * If we installed a semaphore on this request and we only submit
> +	 * the request after the signaler completed, that indicates the
> +	 * system is overloaded and using semaphores at this time only
> +	 * increases the amount of work we are doing. If so, we disable
> +	 * further use of semaphores until we are idle again, whence we
> +	 * optimistically try again.
> +	 */
> +	if (request->sched.semaphores &&
> +	    i915_sw_fence_signaled(&request->semaphore))
> +		request->hw_context->saturated |= request->sched.semaphores;
> +
>   	/* We may be recursing from the signal callback of another i915 fence */
>   	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
>   
> @@ -785,6 +805,12 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
>   					     I915_FENCE_GFP);
>   }
>   
> +static intel_engine_mask_t
> +already_busywaiting(struct i915_request *rq)
> +{
> +	return rq->sched.semaphores | rq->hw_context->saturated;
> +}
> +
>   static int
>   emit_semaphore_wait(struct i915_request *to,
>   		    struct i915_request *from,
> @@ -798,7 +824,7 @@ emit_semaphore_wait(struct i915_request *to,
>   	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
>   
>   	/* Just emit the first semaphore we see as request space is limited. */
> -	if (to->sched.semaphores & from->engine->mask)
> +	if (already_busywaiting(to) & from->engine->mask)
>   		return i915_sw_fence_await_dma_fence(&to->submit,
>   						     &from->fence, 0,
>   						     I915_FENCE_GFP);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems
  2019-04-30  8:55   ` Tvrtko Ursulin
@ 2019-04-30  9:04     ` Chris Wilson
  2019-04-30  9:07     ` Chris Wilson
  1 sibling, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2019-04-30  9:04 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: Dmitry Ermilov

Quoting Tvrtko Ursulin (2019-04-30 09:55:59)
> 
> On 29/04/2019 19:00, Chris Wilson wrote:
> > Asking the GPU to busywait on a memory address, perhaps not unexpectedly
> > in hindsight for a shared system, leads to bus contention that affects
> > CPU programs trying to concurrently access memory. This can manifest as
> > a drop in transcode throughput on highly over-saturated workloads.
> > 
> > The only clue offered by perf, is that the bus-cycles (perf stat -e
> > bus-cycles) jumped by 50% when enabling semaphores. This corresponds
> > with extra CPU active cycles being attributed to intel_idle's mwait.
> > 
> > This patch introduces a heuristic to try and detect when more than one
> > client is submitting to the GPU pushing it into an oversaturated state.
> > As we already keep track of when the semaphores are signaled, we can
> > inspect their state on submitting the busywait batch and if we planned
> > to use a semaphore but were too late, conclude that the GPU is
> > overloaded and not try to use semaphores in future requests. In
> > practice, this means we optimistically try to use semaphores for the
> > first frame of a transcode job split over multiple engines, and fail is
> > there are multiple clients active and continue not to use semaphores for
> > the subsequent frames in the sequence. Periodically, trying to
> > optimistically switch semaphores back on whenever the client waits to
> > catch up with the transcode results.
> > 
> 
> [snipped long benchmark results]
> 
> > Indicating that we've recovered the regression from enabling semaphores
> > on this saturated setup, with a hint towards an overall improvement.
> > 
> > Very similar, but of smaller magnitude, results are observed on both
> > Skylake(gt2) and Kabylake(gt4). This may be due to the reduced impact of
> > bus-cycles, where we see a 50% hit on Broxton, it is only 10% on the big
> > core, in this particular test.
> > 
> > One observation to make here is that for a greedy client trying to
> > maximise its own throughput, using semaphores is the right choice. It is
> > only the holistic system-wide view that semaphores of one client
> > impacts another and reduces the overall throughput where we would choose
> > to disable semaphores.
> 
> Since we acknowledge problem is the shared nature of the iGPU, my 
> concern is that we still cannot account for both partners here when 
> deciding to omit semaphore emission. In other words we trade bus 
> throughput for submission latency.
> 
> Assuming a light GPU task (in the sense of not oversubscribing, but with 
> ping-pong inter-engine dependencies), simultaneous to a heavier CPU 
> task, our latency improvement still imposes a performance penalty on the 
> latter.

Maybe, maybe not. I think you have to be in the position where there is
no GPU latency to be gained for the increased bus traffic to lose.
 
> For instance a consumer level single stream transcoding session with CPU 
> heavy part of the pipeline, or a CPU intensive game.
> 
> (Ideally we would need a bus saturation signal to feed into our logic, 
> not just engine saturation. Which I don't think is possible.)
> 
> So I am still leaning towards being cautious and just abandoning 
> semaphores for now.

Being greedy, the single consumer case is compelling. The same
benchmarks see 5-10% throughput improvement for the single client
(depending on machine).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems
  2019-04-30  8:55   ` Tvrtko Ursulin
  2019-04-30  9:04     ` Chris Wilson
@ 2019-04-30  9:07     ` Chris Wilson
  1 sibling, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2019-04-30  9:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: Dmitry Ermilov

Quoting Tvrtko Ursulin (2019-04-30 09:55:59)
> 
> On 29/04/2019 19:00, Chris Wilson wrote:
> So I am still leaning towards being cautious and just abandoning 
> semaphores for now.

Fwiw, we have another 4 weeks to pull the plug for 5.2.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2] drm/i915: Only reschedule the submission tasklet if preemption is possible
  2019-04-29 18:00 ` [PATCH 2/5] drm/i915: Only reschedule the submission tasklet if preemption is possible Chris Wilson
@ 2019-04-30  9:35   ` Chris Wilson
  0 siblings, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2019-04-30  9:35 UTC (permalink / raw)
  To: intel-gfx

If we couple the scheduler more tightly with the execlists policy, we
can apply the preemption policy to the question of whether we need to
kick the tasklet at all for this priority bump.

v2: Rephrase it as a core i915 policy and not an execlists foible.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h      | 18 ------------------
 drivers/gpu/drm/i915/gt/intel_lrc.c         |  4 ++--
 drivers/gpu/drm/i915/gt/selftest_lrc.c      |  7 ++++++-
 drivers/gpu/drm/i915/i915_request.c         |  2 --
 drivers/gpu/drm/i915/i915_scheduler.c       | 18 +++++++++++-------
 drivers/gpu/drm/i915/i915_scheduler.h       | 18 ++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc_submission.c |  3 ++-
 7 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 3e53f53bc52b..317712d6f7e6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -106,24 +106,6 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
 
-static inline bool __execlists_need_preempt(int prio, int last)
-{
-	/*
-	 * Allow preemption of low -> normal -> high, but we do
-	 * not allow low priority tasks to preempt other low priority
-	 * tasks under the impression that latency for low priority
-	 * tasks does not matter (as much as background throughput),
-	 * so kiss.
-	 *
-	 * More naturally we would write
-	 *	prio >= max(0, last);
-	 * except that we wish to prevent triggering preemption at the same
-	 * priority level: the task that is running should remain running
-	 * to preserve FIFO ordering of dependencies.
-	 */
-	return prio > max(I915_PRIORITY_NORMAL - 1, last);
-}
-
 static inline void
 execlists_set_active(struct intel_engine_execlists *execlists,
 		     unsigned int bit)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 01f58a152a9e..4cb87d96deb1 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -251,8 +251,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	 * ourselves, ignore the request.
 	 */
 	last_prio = effective_prio(rq);
-	if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
-				      last_prio))
+	if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
+					 last_prio))
 		return false;
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 84538f69185b..4b042893dc0e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -638,14 +638,19 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	GEM_BUG_ON(i915_request_completed(rq));
 
 	i915_sw_fence_init(&rq->submit, dummy_notify);
-	i915_sw_fence_commit(&rq->submit);
+	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 
 	return rq;
 }
 
 static void dummy_request_free(struct i915_request *dummy)
 {
+	/* We have to fake the CS interrupt to kick the next request */
+	i915_sw_fence_commit(&dummy->submit);
+
 	i915_request_mark_complete(dummy);
+	dma_fence_signal(&dummy->fence);
+
 	i915_sched_node_fini(&dummy->sched);
 	i915_sw_fence_fini(&dummy->submit);
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index af8c9fa5e066..2e22da66a56c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1358,9 +1358,7 @@ long i915_request_wait(struct i915_request *rq,
 	if (flags & I915_WAIT_PRIORITY) {
 		if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
 			gen6_rps_boost(rq);
-		local_bh_disable(); /* suspend tasklets for reprioritisation */
 		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
-		local_bh_enable(); /* kick tasklets en masse */
 	}
 
 	wait.tsk = current;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 39bc4f54e272..88d18600f5db 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -261,16 +261,20 @@ sched_lock_engine(const struct i915_sched_node *node,
 	return engine;
 }
 
-static bool inflight(const struct i915_request *rq,
-		     const struct intel_engine_cs *engine)
+static inline int rq_prio(const struct i915_request *rq)
 {
-	const struct i915_request *active;
+	return rq->sched.attr.priority | __NO_PREEMPTION;
+}
+
+static bool kick_tasklet(const struct intel_engine_cs *engine, int prio)
+{
+	const struct i915_request *inflight =
+		port_request(engine->execlists.port);
 
-	if (!i915_request_is_active(rq))
+	if (!inflight)
 		return false;
 
-	active = port_request(engine->execlists.port);
-	return active->hw_context == rq->hw_context;
+	return i915_scheduler_need_preempt(prio, rq_prio(inflight));
 }
 
 static void __i915_schedule(struct i915_request *rq,
@@ -400,7 +404,7 @@ static void __i915_schedule(struct i915_request *rq,
 		 * If we are already the currently executing context, don't
 		 * bother evaluating if we should preempt ourselves.
 		 */
-		if (inflight(node_to_request(node), engine))
+		if (!kick_tasklet(engine, prio))
 			continue;
 
 		/* Defer (tasklet) submission until after all of our updates. */
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 07d243acf553..7eefccff39bf 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -52,4 +52,22 @@ static inline void i915_priolist_free(struct i915_priolist *p)
 		__i915_priolist_free(p);
 }
 
+static inline bool i915_scheduler_need_preempt(int prio, int active)
+{
+	/*
+	 * Allow preemption of low -> normal -> high, but we do
+	 * not allow low priority tasks to preempt other low priority
+	 * tasks under the impression that latency for low priority
+	 * tasks does not matter (as much as background throughput),
+	 * so kiss.
+	 *
+	 * More naturally we would write
+	 *	prio >= max(0, last);
+	 * except that we wish to prevent triggering preemption at the same
+	 * priority level: the task that is running should remain running
+	 * to preserve FIFO ordering of dependencies.
+	 */
+	return prio > max(I915_PRIORITY_NORMAL - 1, active);
+}
+
 #endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 4c814344809c..410393bb8559 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -746,7 +746,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 				&engine->i915->guc.preempt_work[engine->id];
 			int prio = execlists->queue_priority_hint;
 
-			if (__execlists_need_preempt(prio, port_prio(port))) {
+			if (i915_scheduler_need_preempt(prio,
+							port_prio(port))) {
 				execlists_set_active(execlists,
 						     EXECLISTS_ACTIVE_PREEMPT);
 				queue_work(engine->i915->guc.preempt_wq,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling
  2019-04-29 18:00 [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling Chris Wilson
                   ` (5 preceding siblings ...)
  2019-04-30  7:39 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-04-30 11:12 ` Patchwork
  2019-04-30 13:12 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling (rev2) Patchwork
  2019-04-30 13:27 ` ✗ Fi.CI.BAT: failure " Patchwork
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2019-04-30 11:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Wait for the struct_mutex on idling
URL   : https://patchwork.freedesktop.org/series/60072/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6012_full -> Patchwork_12899_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_12899_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_12899_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_12899_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_caching@read-writes:
    - shard-skl:          [PASS][1] -> [INCOMPLETE][2] +5 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl9/igt@gem_caching@read-writes.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl2/igt@gem_caching@read-writes.html

  * igt@perf_pmu@busy-accuracy-2-vcs0:
    - shard-skl:          NOTRUN -> [INCOMPLETE][3] +2 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl6/igt@perf_pmu@busy-accuracy-2-vcs0.html

  
Known issues
------------

  Here are the changes found in Patchwork_12899_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@mock_requests:
    - shard-skl:          [PASS][4] -> [INCOMPLETE][5] ([fdo#110550])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl1/igt@i915_selftest@mock_requests.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl3/igt@i915_selftest@mock_requests.html

  * igt@kms_cursor_crc@cursor-128x42-onscreen:
    - shard-skl:          [PASS][6] -> [INCOMPLETE][7] ([fdo#108972]) +1 similar issue
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl3/igt@kms_cursor_crc@cursor-128x42-onscreen.html
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl10/igt@kms_cursor_crc@cursor-128x42-onscreen.html

  * igt@kms_cursor_crc@cursor-64x21-offscreen:
    - shard-skl:          [PASS][8] -> [FAIL][9] ([fdo#103232])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl2/igt@kms_cursor_crc@cursor-64x21-offscreen.html
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl6/igt@kms_cursor_crc@cursor-64x21-offscreen.html

  * igt@kms_draw_crc@draw-method-xrgb2101010-mmap-wc-ytiled:
    - shard-skl:          [PASS][10] -> [FAIL][11] ([fdo#103184] / [fdo#103232])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl8/igt@kms_draw_crc@draw-method-xrgb2101010-mmap-wc-ytiled.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl6/igt@kms_draw_crc@draw-method-xrgb2101010-mmap-wc-ytiled.html

  * igt@kms_flip@2x-flip-vs-expired-vblank-interruptible:
    - shard-glk:          [PASS][12] -> [FAIL][13] ([fdo#105363])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-glk4/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-glk9/igt@kms_flip@2x-flip-vs-expired-vblank-interruptible.html

  * igt@kms_flip@2x-flip-vs-suspend:
    - shard-hsw:          [PASS][14] -> [INCOMPLETE][15] ([fdo#103540]) +1 similar issue
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-hsw5/igt@kms_flip@2x-flip-vs-suspend.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-hsw5/igt@kms_flip@2x-flip-vs-suspend.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-skl:          [PASS][16] -> [INCOMPLETE][17] ([fdo#109507])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl1/igt@kms_flip@flip-vs-suspend-interruptible.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl5/igt@kms_flip@flip-vs-suspend-interruptible.html
    - shard-snb:          [PASS][18] -> [INCOMPLETE][19] ([fdo#105411])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-snb5/igt@kms_flip@flip-vs-suspend-interruptible.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-snb1/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-pwrite:
    - shard-iclb:         [PASS][20] -> [FAIL][21] ([fdo#103167]) +3 similar issues
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-iclb6/igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-pwrite.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-iclb4/igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-pwrite.html

  * igt@kms_frontbuffer_tracking@psr-suspend:
    - shard-skl:          [PASS][22] -> [INCOMPLETE][23] ([fdo#104108] / [fdo#106978])
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl5/igt@kms_frontbuffer_tracking@psr-suspend.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl5/igt@kms_frontbuffer_tracking@psr-suspend.html

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min:
    - shard-skl:          [PASS][24] -> [FAIL][25] ([fdo#108145])
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl5/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl5/igt@kms_plane_alpha_blend@pipe-a-constant-alpha-min.html

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          [PASS][26] -> [FAIL][27] ([fdo#108145] / [fdo#110403])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl2/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl6/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html

  * igt@kms_psr@psr2_cursor_mmap_cpu:
    - shard-iclb:         [PASS][28] -> [SKIP][29] ([fdo#109441]) +1 similar issue
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-iclb3/igt@kms_psr@psr2_cursor_mmap_cpu.html

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-kbl:          [PASS][30] -> [FAIL][31] ([fdo#109016])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-kbl7/igt@kms_rotation_crc@multiplane-rotation-cropping-top.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-kbl3/igt@kms_rotation_crc@multiplane-rotation-cropping-top.html

  * igt@kms_setmode@basic:
    - shard-kbl:          [PASS][32] -> [FAIL][33] ([fdo#99912])
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-kbl5/igt@kms_setmode@basic.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-kbl6/igt@kms_setmode@basic.html

  * igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend:
    - shard-iclb:         [PASS][34] -> [INCOMPLETE][35] ([fdo#107713]) +1 similar issue
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-iclb2/igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-iclb2/igt@kms_vblank@pipe-b-ts-continuation-dpms-suspend.html

  * igt@kms_vblank@pipe-c-ts-continuation-suspend:
    - shard-apl:          [PASS][36] -> [DMESG-WARN][37] ([fdo#108566]) +5 similar issues
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-apl5/igt@kms_vblank@pipe-c-ts-continuation-suspend.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-apl5/igt@kms_vblank@pipe-c-ts-continuation-suspend.html

  * igt@sw_sync@sync_multi_consumer:
    - shard-glk:          [PASS][38] -> [INCOMPLETE][39] ([fdo#103359] / [k.org#198133]) +5 similar issues
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-glk4/igt@sw_sync@sync_multi_consumer.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-glk9/igt@sw_sync@sync_multi_consumer.html

  
#### Possible fixes ####

  * igt@i915_suspend@debugfs-reader:
    - shard-apl:          [DMESG-WARN][40] ([fdo#108566]) -> [PASS][41] +8 similar issues
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-apl3/igt@i915_suspend@debugfs-reader.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-apl8/igt@i915_suspend@debugfs-reader.html

  * igt@kms_cursor_crc@cursor-64x64-suspend:
    - shard-skl:          [INCOMPLETE][42] ([fdo#104108]) -> [PASS][43]
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl3/igt@kms_cursor_crc@cursor-64x64-suspend.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl4/igt@kms_cursor_crc@cursor-64x64-suspend.html

  * igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic:
    - shard-glk:          [FAIL][44] ([fdo#106509] / [fdo#107409]) -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-glk2/igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-glk3/igt@kms_cursor_legacy@2x-nonblocking-modeset-vs-cursor-atomic.html

  * igt@kms_flip@modeset-vs-vblank-race:
    - shard-glk:          [FAIL][46] ([fdo#103060]) -> [PASS][47]
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-glk8/igt@kms_flip@modeset-vs-vblank-race.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-glk7/igt@kms_flip@modeset-vs-vblank-race.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw:
    - shard-iclb:         [FAIL][48] ([fdo#103167]) -> [PASS][49] +4 similar issues
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-iclb8/igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-iclb3/igt@kms_frontbuffer_tracking@fbcpsr-1p-pri-indfb-multidraw.html

  * igt@kms_frontbuffer_tracking@fbcpsr-suspend:
    - shard-skl:          [INCOMPLETE][50] ([fdo#104108] / [fdo#106978]) -> [PASS][51]
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl7/igt@kms_frontbuffer_tracking@fbcpsr-suspend.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl3/igt@kms_frontbuffer_tracking@fbcpsr-suspend.html
    - shard-iclb:         [INCOMPLETE][52] ([fdo#106978] / [fdo#107713]) -> [PASS][53]
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-iclb4/igt@kms_frontbuffer_tracking@fbcpsr-suspend.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-iclb8/igt@kms_frontbuffer_tracking@fbcpsr-suspend.html

  * igt@kms_plane@pixel-format-pipe-c-planes:
    - shard-glk:          [SKIP][54] ([fdo#109271]) -> [PASS][55] +1 similar issue
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-glk4/igt@kms_plane@pixel-format-pipe-c-planes.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-glk9/igt@kms_plane@pixel-format-pipe-c-planes.html

  * igt@kms_psr@psr2_cursor_mmap_gtt:
    - shard-iclb:         [SKIP][56] ([fdo#109441]) -> [PASS][57]
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-iclb7/igt@kms_psr@psr2_cursor_mmap_gtt.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_gtt.html

  
#### Warnings ####

  * igt@gem_exec_schedule@wide-bsd2:
    - shard-hsw:          [SKIP][58] ([fdo#109271]) -> [INCOMPLETE][59] ([fdo#103540])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-hsw5/igt@gem_exec_schedule@wide-bsd2.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-hsw8/igt@gem_exec_schedule@wide-bsd2.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-shrfb-plflip-blt:
    - shard-skl:          [FAIL][60] ([fdo#108040]) -> [FAIL][61] ([fdo#103167])
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6012/shard-skl8/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-shrfb-plflip-blt.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/shard-skl6/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-shrfb-plflip-blt.html

  
  [fdo#103060]: https://bugs.freedesktop.org/show_bug.cgi?id=103060
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103184]: https://bugs.freedesktop.org/show_bug.cgi?id=103184
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#106509]: https://bugs.freedesktop.org/show_bug.cgi?id=106509
  [fdo#106978]: https://bugs.freedesktop.org/show_bug.cgi?id=106978
  [fdo#107409]: https://bugs.freedesktop.org/show_bug.cgi?id=107409
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#108040]: https://bugs.freedesktop.org/show_bug.cgi?id=108040
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108972]: https://bugs.freedesktop.org/show_bug.cgi?id=108972
  [fdo#109016]: https://bugs.freedesktop.org/show_bug.cgi?id=109016
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109507]: https://bugs.freedesktop.org/show_bug.cgi?id=109507
  [fdo#110403]: https://bugs.freedesktop.org/show_bug.cgi?id=110403
  [fdo#110550]: https://bugs.freedesktop.org/show_bug.cgi?id=110550
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * Linux: CI_DRM_6012 -> Patchwork_12899

  CI_DRM_6012: e4882f199157e3fb73d1791352931096f6ecfcfd @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4968: caed251990f35bfe45368f803980071a73e36315 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12899: 5ee78e3a3354cb0c235a681776696b7dbbbd173e @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12899/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling (rev2)
  2019-04-29 18:00 [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling Chris Wilson
                   ` (6 preceding siblings ...)
  2019-04-30 11:12 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2019-04-30 13:12 ` Patchwork
  2019-04-30 13:27 ` ✗ Fi.CI.BAT: failure " Patchwork
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2019-04-30 13:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Wait for the struct_mutex on idling (rev2)
URL   : https://patchwork.freedesktop.org/series/60072/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Wait for the struct_mutex on idling
Okay!

Commit: drm/i915: Only reschedule the submission tasklet if preemption is possible
-O:drivers/gpu/drm/i915/gt/intel_engine.h:124:23: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/gt/intel_engine.h:124:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_scheduler.h:70:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_scheduler.h:70:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_scheduler.h:70:23: warning: expression using sizeof(void)

Commit: drm/i915: Delay semaphore submission until the start of the signaler
Okay!

Commit: drm/i915: Disable semaphore busywaits on saturated systems
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915/execlists: Don't apply priority boost for resets
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling (rev2)
  2019-04-29 18:00 [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling Chris Wilson
                   ` (7 preceding siblings ...)
  2019-04-30 13:12 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling (rev2) Patchwork
@ 2019-04-30 13:27 ` Patchwork
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2019-04-30 13:27 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Wait for the struct_mutex on idling (rev2)
URL   : https://patchwork.freedesktop.org/series/60072/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6017 -> Patchwork_12906
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_12906 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_12906, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/60072/revisions/2/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_12906:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_execlists:
    - fi-skl-6600u:       [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6017/fi-skl-6600u/igt@i915_selftest@live_execlists.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12906/fi-skl-6600u/igt@i915_selftest@live_execlists.html

  
Known issues
------------

  Here are the changes found in Patchwork_12906 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@kms_chamelium@dp-crc-fast:
    - fi-kbl-7500u:       [PASS][3] -> [DMESG-WARN][4] ([fdo#103841])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6017/fi-kbl-7500u/igt@kms_chamelium@dp-crc-fast.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12906/fi-kbl-7500u/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - fi-apl-guc:         [PASS][5] -> [DMESG-WARN][6] ([fdo#110512])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6017/fi-apl-guc/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12906/fi-apl-guc/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-apl-guc:         [DMESG-WARN][7] ([fdo#110512]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6017/fi-apl-guc/igt@gem_exec_suspend@basic-s3.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12906/fi-apl-guc/igt@gem_exec_suspend@basic-s3.html

  * igt@i915_selftest@live_contexts:
    - fi-bdw-gvtdvm:      [DMESG-FAIL][9] ([fdo#110235]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6017/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12906/fi-bdw-gvtdvm/igt@i915_selftest@live_contexts.html
    - fi-skl-gvtdvm:      [DMESG-FAIL][11] ([fdo#110235]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6017/fi-skl-gvtdvm/igt@i915_selftest@live_contexts.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12906/fi-skl-gvtdvm/igt@i915_selftest@live_contexts.html

  
  [fdo#103841]: https://bugs.freedesktop.org/show_bug.cgi?id=103841
  [fdo#110235]: https://bugs.freedesktop.org/show_bug.cgi?id=110235
  [fdo#110512]: https://bugs.freedesktop.org/show_bug.cgi?id=110512


Participating hosts (53 -> 42)
------------------------------

  Missing    (11): fi-kbl-soraka fi-ilk-m540 fi-bdw-5557u fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-gdg-551 fi-ivb-3770 fi-icl-y fi-byt-clapper 


Build changes
-------------

  * Linux: CI_DRM_6017 -> Patchwork_12906

  CI_DRM_6017: 69c3a37af9430650d1fc2a5555d4d0786898694d @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4971: fc5e0467eb6913d21ad932aa8a31c77fdb5a9c77 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12906: a440a6ecfb86e62bbde7077ed784163da920faf1 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

a440a6ecfb86 drm/i915/execlists: Don't apply priority boost for resets
7b1444ed17a5 drm/i915: Disable semaphore busywaits on saturated systems
6754404a7ba7 drm/i915: Delay semaphore submission until the start of the signaler
bdd5fb2484ed drm/i915: Only reschedule the submission tasklet if preemption is possible
cf066d2068a7 drm/i915: Wait for the struct_mutex on idling

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12906/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems
  2019-04-29 18:00 ` [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems Chris Wilson
  2019-04-30  8:55   ` Tvrtko Ursulin
@ 2019-05-03 13:25   ` Tvrtko Ursulin
  2019-05-03 14:04   ` Ville Syrjälä
  2 siblings, 0 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2019-05-03 13:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Dmitry Ermilov


On 29/04/2019 19:00, Chris Wilson wrote:
> Asking the GPU to busywait on a memory address, perhaps not unexpectedly
> in hindsight for a shared system, leads to bus contention that affects
> CPU programs trying to concurrently access memory. This can manifest as
> a drop in transcode throughput on highly over-saturated workloads.
> 
> The only clue offered by perf, is that the bus-cycles (perf stat -e
> bus-cycles) jumped by 50% when enabling semaphores. This corresponds
> with extra CPU active cycles being attributed to intel_idle's mwait.
> 
> This patch introduces a heuristic to try and detect when more than one
> client is submitting to the GPU pushing it into an oversaturated state.
> As we already keep track of when the semaphores are signaled, we can
> inspect their state on submitting the busywait batch and if we planned
> to use a semaphore but were too late, conclude that the GPU is
> overloaded and not try to use semaphores in future requests. In
> practice, this means we optimistically try to use semaphores for the
> first frame of a transcode job split over multiple engines, and fail is
> there are multiple clients active and continue not to use semaphores for
> the subsequent frames in the sequence. Periodically, trying to
> optimistically switch semaphores back on whenever the client waits to
> catch up with the transcode results.
> 
> With 1 client, on Broxton J3455, with the relative fps normalized by %cpu:
> 
> x no semaphores
> + drm-tip
> * throttle
> +------------------------------------------------------------------------+
> |                                                    *                   |
> |                                                    *+                  |
> |                                                    **+                 |
> |                                                    **+  x              |
> |                                x               *  +**+  x              |
> |                                x  x       *    *  +***x xx             |
> |                                x  x       *    * *+***x *x             |
> |                                x  x*   +  *    * *****x *x x           |
> |                         +    x xx+x*   + ***   * ********* x   *       |
> |                         +    x xx+x*   * *** +** ********* xx  *       |
> |    *   +         ++++*  +    x*x****+*+* ***+*************+x*  *       |
> |*+ +** *+ + +* + *++****** *xxx**********x***+*****************+*++    *|
> |                                   |__________A_____M_____|             |
> |                           |_______________A____M_________|             |
> |                                 |____________A___M________|            |
> +------------------------------------------------------------------------+
>      N           Min           Max        Median           Avg        Stddev
> x 120       2.60475       3.50941       3.31123     3.2143953    0.21117399
> + 120        2.3826       3.57077       3.25101     3.1414161    0.28146407
> Difference at 95.0% confidence
> 	-0.0729792 +/- 0.0629585
> 	-2.27039% +/- 1.95864%
> 	(Student's t, pooled s = 0.248814)
> * 120       2.35536       3.66713        3.2849     3.2059917    0.24618565
> No difference proven at 95.0% confidence
> 
> With 10 clients over-saturating the pipeline:
> 
> x no semaphores
> + drm-tip
> * patch
> +------------------------------------------------------------------------+
> |                     ++                                        **       |
> |                     ++                                        **       |
> |                     ++                                        **       |
> |                     ++                                        **       |
> |                     ++                                    xx ***       |
> |                     ++                                    xx ***       |
> |                     ++                                    xxx***       |
> |                     ++                                    xxx***       |
> |                    +++                                    xxx***       |
> |                    +++                                    xx****       |
> |                    +++                                    xx****       |
> |                    +++                                    xx****       |
> |                    +++                                    xx****       |
> |                    ++++                                   xx****       |
> |                   +++++                                   xx****       |
> |                   +++++                                 x x******      |
> |                  ++++++                                 xxx*******     |
> |                  ++++++                                 xxx*******     |
> |                  ++++++                                 xxx*******     |
> |                  ++++++                                 xx********     |
> |                  ++++++                               xxxx********     |
> |                  ++++++                               xxxx********     |
> |                ++++++++                             xxxxx*********     |
> |+ +  +        + ++++++++                           xxx*xx**********x*  *|
> |                                                         |__A__|        |
> |                 |__AM__|                                               |
> |                                                            |__A_|      |
> +------------------------------------------------------------------------+
>      N           Min           Max        Median           Avg        Stddev
> x 120       2.47855        2.8972       2.72376     2.7193402   0.074604933
> + 120       1.17367       1.77459       1.71977     1.6966782   0.085850697
> Difference at 95.0% confidence
> 	-1.02266 +/- 0.0203502
> 	-37.607% +/- 0.748352%
> 	(Student's t, pooled s = 0.0804246)
> * 120       2.57868       3.00821       2.80142     2.7923878   0.058646477
> Difference at 95.0% confidence
> 	0.0730476 +/- 0.0169791
> 	2.68622% +/- 0.624383%
> 	(Student's t, pooled s = 0.0671018)
> 
> Indicating that we've recovered the regression from enabling semaphores
> on this saturated setup, with a hint towards an overall improvement.
> 
> Very similar, but of smaller magnitude, results are observed on both
> Skylake(gt2) and Kabylake(gt4). This may be due to the reduced impact of
> bus-cycles, where we see a 50% hit on Broxton, it is only 10% on the big
> core, in this particular test.
> 
> One observation to make here is that for a greedy client trying to
> maximise its own throughput, using semaphores is the right choice. It is
> only the holistic system-wide view that semaphores of one client
> impacts another and reduces the overall throughput where we would choose
> to disable semaphores.
> 
> The most noticeable negactive impact this has is on the no-op
> microbenchmarks, which are also very notable for having no cpu bus load.
> In particular, this increases the runtime and energy consumption of
> gem_exec_whisper.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> Cc: Dmitry Ermilov <dmitry.ermilov@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.c       |  2 ++
>   drivers/gpu/drm/i915/gt/intel_context_types.h |  3 ++
>   drivers/gpu/drm/i915/i915_request.c           | 28 ++++++++++++++++++-
>   3 files changed, 32 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 1f1761fc6597..5b31e1e05ddd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -116,6 +116,7 @@ intel_context_init(struct intel_context *ce,
>   	ce->engine = engine;
>   	ce->ops = engine->cops;
>   	ce->sseu = engine->sseu;
> +	ce->saturated = 0;
>   
>   	INIT_LIST_HEAD(&ce->signal_link);
>   	INIT_LIST_HEAD(&ce->signals);
> @@ -158,6 +159,7 @@ void intel_context_enter_engine(struct intel_context *ce)
>   
>   void intel_context_exit_engine(struct intel_context *ce)
>   {
> +	ce->saturated = 0;
>   	intel_engine_pm_put(ce->engine);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index d5a7dbd0daee..963a312430e6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -13,6 +13,7 @@
>   #include <linux/types.h>
>   
>   #include "i915_active_types.h"
> +#include "intel_engine_types.h"
>   #include "intel_sseu.h"
>   
>   struct i915_gem_context;
> @@ -52,6 +53,8 @@ struct intel_context {
>   	atomic_t pin_count;
>   	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
>   
> +	intel_engine_mask_t saturated; /* submitting semaphores too late? */
> +
>   	/**
>   	 * active_tracker: Active tracker for the external rq activity
>   	 * on this intel_context object.
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 8cb3ed5531e3..2d429967f403 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -410,6 +410,26 @@ void __i915_request_submit(struct i915_request *request)
>   	if (i915_gem_context_is_banned(request->gem_context))
>   		i915_request_skip(request, -EIO);
>   
> +	/*
> +	 * Are we using semaphores when the gpu is already saturated?
> +	 *
> +	 * Using semaphores incurs a cost in having the GPU poll a
> +	 * memory location, busywaiting for it to change. The continual
> +	 * memory reads can have a noticeable impact on the rest of the
> +	 * system with the extra bus traffic, stalling the cpu as it too
> +	 * tries to access memory across the bus (perf stat -e bus-cycles).
> +	 *
> +	 * If we installed a semaphore on this request and we only submit
> +	 * the request after the signaler completed, that indicates the
> +	 * system is overloaded and using semaphores at this time only
> +	 * increases the amount of work we are doing. If so, we disable
> +	 * further use of semaphores until we are idle again, whence we
> +	 * optimistically try again.
> +	 */
> +	if (request->sched.semaphores &&
> +	    i915_sw_fence_signaled(&request->semaphore))
> +		request->hw_context->saturated |= request->sched.semaphores;
> +
>   	/* We may be recursing from the signal callback of another i915 fence */
>   	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
>   
> @@ -785,6 +805,12 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
>   					     I915_FENCE_GFP);
>   }
>   
> +static intel_engine_mask_t
> +already_busywaiting(struct i915_request *rq)
> +{
> +	return rq->sched.semaphores | rq->hw_context->saturated;
> +}
> +
>   static int
>   emit_semaphore_wait(struct i915_request *to,
>   		    struct i915_request *from,
> @@ -798,7 +824,7 @@ emit_semaphore_wait(struct i915_request *to,
>   	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
>   
>   	/* Just emit the first semaphore we see as request space is limited. */
> -	if (to->sched.semaphores & from->engine->mask)
> +	if (already_busywaiting(to) & from->engine->mask)
>   		return i915_sw_fence_await_dma_fence(&to->submit,
>   						     &from->fence, 0,
>   						     I915_FENCE_GFP);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems
  2019-04-29 18:00 ` [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems Chris Wilson
  2019-04-30  8:55   ` Tvrtko Ursulin
  2019-05-03 13:25   ` Tvrtko Ursulin
@ 2019-05-03 14:04   ` Ville Syrjälä
  2019-05-03 14:12     ` Chris Wilson
  2 siblings, 1 reply; 18+ messages in thread
From: Ville Syrjälä @ 2019-05-03 14:04 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Dmitry Ermilov

On Mon, Apr 29, 2019 at 07:00:19PM +0100, Chris Wilson wrote:
> Asking the GPU to busywait on a memory address, perhaps not unexpectedly
> in hindsight for a shared system, leads to bus contention that affects
> CPU programs trying to concurrently access memory. This can manifest as
> a drop in transcode throughput on highly over-saturated workloads.

We can't use the singalling semaphore variant?

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems
  2019-05-03 14:04   ` Ville Syrjälä
@ 2019-05-03 14:12     ` Chris Wilson
  2019-05-03 14:32       ` Ville Syrjälä
  0 siblings, 1 reply; 18+ messages in thread
From: Chris Wilson @ 2019-05-03 14:12 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx, Dmitry Ermilov

Quoting Ville Syrjälä (2019-05-03 15:04:57)
> On Mon, Apr 29, 2019 at 07:00:19PM +0100, Chris Wilson wrote:
> > Asking the GPU to busywait on a memory address, perhaps not unexpectedly
> > in hindsight for a shared system, leads to bus contention that affects
> > CPU programs trying to concurrently access memory. This can manifest as
> > a drop in transcode throughput on highly over-saturated workloads.
> 
> We can't use the singalling semaphore variant?

That requires us to broadcast a signal to each engine (for which I can
hear the cries of cross-powerwell wakes), and currently does not work
with execlists + context-id==0. Or at least it failed in my testing.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems
  2019-05-03 14:12     ` Chris Wilson
@ 2019-05-03 14:32       ` Ville Syrjälä
  0 siblings, 0 replies; 18+ messages in thread
From: Ville Syrjälä @ 2019-05-03 14:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Dmitry Ermilov

On Fri, May 03, 2019 at 03:12:01PM +0100, Chris Wilson wrote:
> Quoting Ville Syrjälä (2019-05-03 15:04:57)
> > On Mon, Apr 29, 2019 at 07:00:19PM +0100, Chris Wilson wrote:
> > > Asking the GPU to busywait on a memory address, perhaps not unexpectedly
> > > in hindsight for a shared system, leads to bus contention that affects
> > > CPU programs trying to concurrently access memory. This can manifest as
> > > a drop in transcode throughput on highly over-saturated workloads.
> > 
> > We can't use the singalling semaphore variant?
> 
> That requires us to broadcast a signal to each engine (for which I can
> hear the cries of cross-powerwell wakes), and currently does not work
> with execlists + context-id==0. Or at least it failed in my testing.

Ah. If only we had MI_MWAIT.

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2019-05-03 14:32 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-29 18:00 [PATCH 1/5] drm/i915: Wait for the struct_mutex on idling Chris Wilson
2019-04-29 18:00 ` [PATCH 2/5] drm/i915: Only reschedule the submission tasklet if preemption is possible Chris Wilson
2019-04-30  9:35   ` [PATCH v2] " Chris Wilson
2019-04-29 18:00 ` [PATCH 3/5] drm/i915: Delay semaphore submission until the start of the signaler Chris Wilson
2019-04-29 18:00 ` [PATCH 4/5] drm/i915: Disable semaphore busywaits on saturated systems Chris Wilson
2019-04-30  8:55   ` Tvrtko Ursulin
2019-04-30  9:04     ` Chris Wilson
2019-04-30  9:07     ` Chris Wilson
2019-05-03 13:25   ` Tvrtko Ursulin
2019-05-03 14:04   ` Ville Syrjälä
2019-05-03 14:12     ` Chris Wilson
2019-05-03 14:32       ` Ville Syrjälä
2019-04-29 18:00 ` [PATCH 5/5] drm/i915/execlists: Don't apply priority boost for resets Chris Wilson
2019-04-29 18:08 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling Patchwork
2019-04-30  7:39 ` ✓ Fi.CI.BAT: success " Patchwork
2019-04-30 11:12 ` ✗ Fi.CI.IGT: failure " Patchwork
2019-04-30 13:12 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/5] drm/i915: Wait for the struct_mutex on idling (rev2) Patchwork
2019-04-30 13:27 ` ✗ Fi.CI.BAT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.